blob: 4b36a87203eed5a6be6e76f7c97ce665c65b637b [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_i18n.h"
22#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000023#include "kmp_itt.h"
24#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000027#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000028#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000044char const __kmp_version_alt_comp[] =
45 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000046#endif /* defined(KMP_GOMP_COMPAT) */
47
48char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000049#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000050 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000051#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000053#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000055#else
Jonathan Peyton30419822017-05-12 18:01:32 +000056 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000057#endif
58
59#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000060char const __kmp_version_lock[] =
61 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000062#endif /* KMP_DEBUG */
63
Jonathan Peyton30419822017-05-12 18:01:32 +000064#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000065
Jim Cownie5e8470a2013-09-27 10:38:44 +000066/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000067
68kmp_info_t __kmp_monitor;
69
Jim Cownie5e8470a2013-09-27 10:38:44 +000070/* Forward declarations */
71
Jonathan Peyton30419822017-05-12 18:01:32 +000072void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
75 int gtid);
76static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
77 kmp_internal_control_t *new_icvs,
78 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000079#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000080static void __kmp_partition_places(kmp_team_t *team,
81 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000082#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000083static void __kmp_do_serial_initialize(void);
84void __kmp_fork_barrier(int gtid, int tid);
85void __kmp_join_barrier(int gtid);
86void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000088
Jim Cownie5e8470a2013-09-27 10:38:44 +000089#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000090static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000091#endif
92
93static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000094#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000095static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000097static void __kmp_unregister_library(void); // called by __kmp_internal_end()
98static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +000099static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
100
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000102/* fast (and somewhat portable) way to get unique identifier of executing
103 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000104int __kmp_get_global_thread_id() {
105 int i;
106 kmp_info_t **other_threads;
107 size_t stack_data;
108 char *stack_addr;
109 size_t stack_size;
110 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
Jonathan Peyton30419822017-05-12 18:01:32 +0000112 KA_TRACE(
113 1000,
114 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000116
Jonathan Peyton30419822017-05-12 18:01:32 +0000117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
118 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
119 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
120 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121
Jonathan Peyton30419822017-05-12 18:01:32 +0000122 if (!TCR_4(__kmp_init_gtid))
123 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124
125#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000126 if (TCR_4(__kmp_gtid_mode) >= 3) {
127 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
128 return __kmp_gtid;
129 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000131 if (TCR_4(__kmp_gtid_mode) >= 2) {
132 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
133 return __kmp_gtid_get_specific();
134 }
135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
136
137 stack_addr = (char *)&stack_data;
138 other_threads = __kmp_threads;
139
140 /* ATT: The code below is a source of potential bugs due to unsynchronized
141 access to __kmp_threads array. For example:
142 1. Current thread loads other_threads[i] to thr and checks it, it is
143 non-NULL.
144 2. Current thread is suspended by OS.
145 3. Another thread unregisters and finishes (debug versions of free()
146 may fill memory with something like 0xEF).
147 4. Current thread is resumed.
148 5. Current thread reads junk from *thr.
149 TODO: Fix it. --ln */
150
151 for (i = 0; i < __kmp_threads_capacity; i++) {
152
153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
154 if (!thr)
155 continue;
156
157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
159
160 /* stack grows down -- search through all of the active threads */
161
162 if (stack_addr <= stack_base) {
163 size_t stack_diff = stack_base - stack_addr;
164
165 if (stack_diff <= stack_size) {
166 /* The only way we can be closer than the allocated */
167 /* stack size is if we are running on this thread. */
168 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
169 return i;
170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173
Jonathan Peyton30419822017-05-12 18:01:32 +0000174 /* get specific to try and determine our gtid */
175 KA_TRACE(1000,
176 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
177 "thread, using TLS\n"));
178 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181
Jonathan Peyton30419822017-05-12 18:01:32 +0000182 /* if we havn't been assigned a gtid, then return code */
183 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000185
186 /* dynamically updated stack window for uber threads to avoid get_specific
187 call */
188 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
189 KMP_FATAL(StackOverflow, i);
190 }
191
192 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
193 if (stack_addr > stack_base) {
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
196 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
197 stack_base);
198 } else {
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 stack_base - stack_addr);
201 }
202
203 /* Reprint stack bounds for ubermaster since they have been refined */
204 if (__kmp_storage_map) {
205 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
207 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
208 other_threads[i]->th.th_info.ds.ds_stacksize,
209 "th_%d stack (refinement)", i);
210 }
211 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
Jonathan Peyton30419822017-05-12 18:01:32 +0000214int __kmp_get_global_thread_id_reg() {
215 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216
Jonathan Peyton30419822017-05-12 18:01:32 +0000217 if (!__kmp_init_serial) {
218 gtid = KMP_GTID_DNE;
219 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000221 if (TCR_4(__kmp_gtid_mode) >= 3) {
222 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
223 gtid = __kmp_gtid;
224 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000226 if (TCR_4(__kmp_gtid_mode) >= 2) {
227 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
228 gtid = __kmp_gtid_get_specific();
229 } else {
230 KA_TRACE(1000,
231 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
232 gtid = __kmp_get_global_thread_id();
233 }
234
235 /* we must be a new uber master sibling thread */
236 if (gtid == KMP_GTID_DNE) {
237 KA_TRACE(10,
238 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
239 "Registering a new gtid.\n"));
240 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
241 if (!__kmp_init_serial) {
242 __kmp_do_serial_initialize();
243 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000247 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
248 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
249 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
Jonathan Peyton30419822017-05-12 18:01:32 +0000253 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254}
255
256/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000257void __kmp_check_stack_overlap(kmp_info_t *th) {
258 int f;
259 char *stack_beg = NULL;
260 char *stack_end = NULL;
261 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
Jonathan Peyton30419822017-05-12 18:01:32 +0000263 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
264 if (__kmp_storage_map) {
265 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
266 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269
Jonathan Peyton30419822017-05-12 18:01:32 +0000270 if (gtid == KMP_GTID_MONITOR) {
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%s stack (%s)", "mon",
274 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000276 __kmp_print_storage_map_gtid(
277 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
278 "th_%d stack (%s)", gtid,
279 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
280 }
281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 /* No point in checking ubermaster threads since they use refinement and
284 * cannot overlap */
285 gtid = __kmp_gtid_from_thread(th);
286 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
287 KA_TRACE(10,
288 ("__kmp_check_stack_overlap: performing extensive checking\n"));
289 if (stack_beg == NULL) {
290 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
291 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 }
293
294 for (f = 0; f < __kmp_threads_capacity; f++) {
295 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
296
297 if (f_th && f_th != th) {
298 char *other_stack_end =
299 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
300 char *other_stack_beg =
301 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
302 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
303 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
304
305 /* Print the other stack values before the abort */
306 if (__kmp_storage_map)
307 __kmp_print_storage_map_gtid(
308 -1, other_stack_beg, other_stack_end,
309 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
310 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
311
312 __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap),
313 KMP_HNT(ChangeStackLimit), __kmp_msg_null);
314 }
315 }
316 }
317 }
318 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
319}
320
321/* ------------------------------------------------------------------------ */
322
323void __kmp_infinite_loop(void) {
324 static int done = FALSE;
325
326 while (!done) {
327 KMP_YIELD(1);
328 }
329}
330
331#define MAX_MESSAGE 512
332
333void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
334 char const *format, ...) {
335 char buffer[MAX_MESSAGE];
336 va_list ap;
337
338 va_start(ap, format);
339 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
340 p2, (unsigned long)size, format);
341 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
342 __kmp_vprintf(kmp_err, buffer, ap);
343#if KMP_PRINT_DATA_PLACEMENT
344 int node;
345 if (gtid >= 0) {
346 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
347 if (__kmp_storage_map_verbose) {
348 node = __kmp_get_host_node(p1);
349 if (node < 0) /* doesn't work, so don't try this next time */
350 __kmp_storage_map_verbose = FALSE;
351 else {
352 char *last;
353 int lastNode;
354 int localProc = __kmp_get_cpu_from_gtid(gtid);
355
356 const int page_size = KMP_GET_PAGE_SIZE();
357
358 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
359 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
360 if (localProc >= 0)
361 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
362 localProc >> 1);
363 else
364 __kmp_printf_no_lock(" GTID %d\n", gtid);
365#if KMP_USE_PRCTL
366 /* The more elaborate format is disabled for now because of the prctl
367 * hanging bug. */
368 do {
369 last = p1;
370 lastNode = node;
371 /* This loop collates adjacent pages with the same host node. */
372 do {
373 (char *)p1 += page_size;
374 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
376 lastNode);
377 } while (p1 <= p2);
378#else
379 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
380 (char *)p1 + (page_size - 1),
381 __kmp_get_host_node(p1));
382 if (p1 < p2) {
383 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
384 (char *)p2 + (page_size - 1),
385 __kmp_get_host_node(p2));
386 }
387#endif
388 }
389 }
390 } else
391 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
392 }
393#endif /* KMP_PRINT_DATA_PLACEMENT */
394 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
395}
396
397void __kmp_warn(char const *format, ...) {
398 char buffer[MAX_MESSAGE];
399 va_list ap;
400
401 if (__kmp_generate_warnings == kmp_warnings_off) {
402 return;
403 }
404
405 va_start(ap, format);
406
407 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
408 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
409 __kmp_vprintf(kmp_err, buffer, ap);
410 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
411
412 va_end(ap);
413}
414
415void __kmp_abort_process() {
416 // Later threads may stall here, but that's ok because abort() will kill them.
417 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
418
419 if (__kmp_debug_buf) {
420 __kmp_dump_debug_buffer();
421 }; // if
422
423 if (KMP_OS_WINDOWS) {
424 // Let other threads know of abnormal termination and prevent deadlock
425 // if abort happened during library initialization or shutdown
426 __kmp_global.g.g_abort = SIGABRT;
427
428 /* On Windows* OS by default abort() causes pop-up error box, which stalls
429 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
430 boxes. _set_abort_behavior() works well, but this function is not
431 available in VS7 (this is not problem for DLL, but it is a problem for
432 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
433 help, at least in some versions of MS C RTL.
434
435 It seems following sequence is the only way to simulate abort() and
436 avoid pop-up error box. */
437 raise(SIGABRT);
438 _exit(3); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
441 }; // if
442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445
446} // __kmp_abort_process
447
Jonathan Peyton30419822017-05-12 18:01:32 +0000448void __kmp_abort_thread(void) {
449 // TODO: Eliminate g_abort global variable and this function.
450 // In case of abort just call abort(), it will kill all the threads.
451 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000452} // __kmp_abort_thread
453
Jonathan Peyton30419822017-05-12 18:01:32 +0000454/* Print out the storage map for the major kmp_info_t thread data structures
455 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
458 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
459 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
462 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
465 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 __kmp_print_storage_map_gtid(
468 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier + 1],
473 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
474 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
477 &thr->th.th_bar[bs_forkjoin_barrier + 1],
478 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
479 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480
Jonathan Peyton30419822017-05-12 18:01:32 +0000481#if KMP_FAST_REDUCTION_BARRIER
482 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
483 &thr->th.th_bar[bs_reduction_barrier + 1],
484 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
485 gtid);
486#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487}
488
Jonathan Peyton30419822017-05-12 18:01:32 +0000489/* Print out the storage map for the major kmp_team_t team data structures
490 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
493 int team_id, int num_thr) {
494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
495 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
496 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000497
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
499 &team->t.t_bar[bs_last_barrier],
500 sizeof(kmp_balign_team_t) * bs_last_barrier,
501 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
504 &team->t.t_bar[bs_plain_barrier + 1],
505 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
506 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507
Jonathan Peyton30419822017-05-12 18:01:32 +0000508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
509 &team->t.t_bar[bs_forkjoin_barrier + 1],
510 sizeof(kmp_balign_team_t),
511 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512
Jonathan Peyton30419822017-05-12 18:01:32 +0000513#if KMP_FAST_REDUCTION_BARRIER
514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
515 &team->t.t_bar[bs_reduction_barrier + 1],
516 sizeof(kmp_balign_team_t),
517 "%s_%d.t_bar[reduction]", header, team_id);
518#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 __kmp_print_storage_map_gtid(
521 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
522 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
526 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
Jonathan Peyton30419822017-05-12 18:01:32 +0000528 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
529 &team->t.t_disp_buffer[num_disp_buff],
530 sizeof(dispatch_shared_info_t) * num_disp_buff,
531 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532
Jonathan Peyton30419822017-05-12 18:01:32 +0000533 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
534 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
535 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536}
537
538static void __kmp_init_allocator() {}
539static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540
541/* ------------------------------------------------------------------------ */
542
Jonathan Peyton99016992015-05-26 17:32:53 +0000543#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000544#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545
Jonathan Peyton30419822017-05-12 18:01:32 +0000546static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
547 // TODO: Change to __kmp_break_bootstrap_lock().
548 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549}
550
Jonathan Peyton30419822017-05-12 18:01:32 +0000551static void __kmp_reset_locks_on_process_detach(int gtid_req) {
552 int i;
553 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000554
Jonathan Peyton30419822017-05-12 18:01:32 +0000555 // PROCESS_DETACH is expected to be called by a thread that executes
556 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
557 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
558 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
559 // threads can be still alive here, although being about to be terminated. The
560 // threads in the array with ds_thread==0 are most suspicious. Actually, it
561 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564
Jonathan Peyton30419822017-05-12 18:01:32 +0000565 // Let's check that there are no other alive threads registered with the OMP
566 // lib.
567 while (1) {
568 thread_count = 0;
569 for (i = 0; i < __kmp_threads_capacity; ++i) {
570 if (!__kmp_threads)
571 continue;
572 kmp_info_t *th = __kmp_threads[i];
573 if (th == NULL)
574 continue;
575 int gtid = th->th.th_info.ds.ds_gtid;
576 if (gtid == gtid_req)
577 continue;
578 if (gtid < 0)
579 continue;
580 DWORD exit_val;
581 int alive = __kmp_is_thread_alive(th, &exit_val);
582 if (alive) {
583 ++thread_count;
584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000586 if (thread_count == 0)
587 break; // success
588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 // Assume that I'm alone. Now it might be safe to check and reset locks.
591 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
592 __kmp_reset_lock(&__kmp_forkjoin_lock);
593#ifdef KMP_DEBUG
594 __kmp_reset_lock(&__kmp_stdio_lock);
595#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596}
597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
599 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 case DLL_PROCESS_ATTACH:
604 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
606 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000607
608 case DLL_PROCESS_DETACH:
609 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
610
611 if (lpReserved != NULL) {
612 // lpReserved is used for telling the difference:
613 // lpReserved == NULL when FreeLibrary() was called,
614 // lpReserved != NULL when the process terminates.
615 // When FreeLibrary() is called, worker threads remain alive. So they will
616 // release the forkjoin lock by themselves. When the process terminates,
617 // worker threads disappear triggering the problem of unreleased forkjoin
618 // lock as described below.
619
620 // A worker thread can take the forkjoin lock. The problem comes up if
621 // that worker thread becomes dead before it releases the forkjoin lock.
622 // The forkjoin lock remains taken, while the thread executing
623 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
624 // to take the forkjoin lock and will always fail, so that the application
625 // will never finish [normally]. This scenario is possible if
626 // __kmpc_end() has not been executed. It looks like it's not a corner
627 // case, but common cases:
628 // - the main function was compiled by an alternative compiler;
629 // - the main function was compiled by icl but without /Qopenmp
630 // (application with plugins);
631 // - application terminates by calling C exit(), Fortran CALL EXIT() or
632 // Fortran STOP.
633 // - alive foreign thread prevented __kmpc_end from doing cleanup.
634 //
635 // This is a hack to work around the problem.
636 // TODO: !!! figure out something better.
637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
638 }
639
640 __kmp_internal_end_library(__kmp_gtid_get_specific());
641
642 return TRUE;
643
644 case DLL_THREAD_ATTACH:
645 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
646
647 /* if we want to register new siblings all the time here call
648 * __kmp_get_gtid(); */
649 return TRUE;
650
651 case DLL_THREAD_DETACH:
652 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
653
654 __kmp_internal_end_thread(__kmp_gtid_get_specific());
655 return TRUE;
656 }
657
658 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659}
660
Jonathan Peyton30419822017-05-12 18:01:32 +0000661#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000662#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000666int __kmp_change_library(int status) {
667 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000668
Jonathan Peyton30419822017-05-12 18:01:32 +0000669 old_status = __kmp_yield_init &
670 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 if (status) {
673 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
674 } else {
675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677
Jonathan Peyton30419822017-05-12 18:01:32 +0000678 return old_status; // return previous setting of whether
679 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000680}
681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682/* __kmp_parallel_deo -- Wait until it's our turn. */
683void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
684 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687#endif /* BUILD_PARALLEL_ORDERED */
688
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 if (__kmp_env_consistency_check) {
690 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 if (!team->t.t_serialized) {
699 KMP_MB();
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
701 KMP_EQ, NULL);
702 KMP_MB();
703 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704#endif /* BUILD_PARALLEL_ORDERED */
705}
706
Jonathan Peyton30419822017-05-12 18:01:32 +0000707/* __kmp_parallel_dxo -- Signal the next task. */
708void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
709 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 int tid = __kmp_tid_from_gtid(gtid);
712 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713#endif /* BUILD_PARALLEL_ORDERED */
714
Jonathan Peyton30419822017-05-12 18:01:32 +0000715 if (__kmp_env_consistency_check) {
716 if (__kmp_threads[gtid]->th.th_root->r.r_active)
717 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
718 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000719#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 if (!team->t.t_serialized) {
721 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722
Jonathan Peyton30419822017-05-12 18:01:32 +0000723 /* use the tid of the next thread in this team */
724 /* TODO replace with general release procedure */
725 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000727#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +0000728 if (ompt_enabled &&
729 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
730 /* accept blame for "ordered" waiting */
731 kmp_info_t *this_thread = __kmp_threads[gtid];
732 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
733 this_thread->th.ompt_thread_info.wait_id);
734 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000735#endif
736
Jonathan Peyton30419822017-05-12 18:01:32 +0000737 KMP_MB(); /* Flush all pending memory write invalidates. */
738 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739#endif /* BUILD_PARALLEL_ORDERED */
740}
741
742/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743/* The BARRIER for a SINGLE process section is always explicit */
744
Jonathan Peyton30419822017-05-12 18:01:32 +0000745int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
746 int status;
747 kmp_info_t *th;
748 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749
Jonathan Peyton30419822017-05-12 18:01:32 +0000750 if (!TCR_4(__kmp_init_parallel))
751 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 th = __kmp_threads[gtid];
754 team = th->th.th_team;
755 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000756
Jonathan Peyton30419822017-05-12 18:01:32 +0000757 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758
Jonathan Peyton30419822017-05-12 18:01:32 +0000759 if (team->t.t_serialized) {
760 status = 1;
761 } else {
762 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 ++th->th.th_local.this_construct;
765 /* try to set team count to thread count--success means thread got the
766 single block */
767 /* TODO: Should this be acquire or release? */
768 if (team->t.t_construct == old_this) {
769 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
770 th->th.th_local.this_construct);
771 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000772#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000773 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
774 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000775#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000777#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 team->t.t_active_level ==
779 1) { // Only report metadata by master of active team at level 1
780 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000782#endif /* USE_ITT_BUILD */
783 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784
Jonathan Peyton30419822017-05-12 18:01:32 +0000785 if (__kmp_env_consistency_check) {
786 if (status && push_ws) {
787 __kmp_push_workshare(gtid, ct_psingle, id_ref);
788 } else {
789 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000793 if (status) {
794 __kmp_itt_single_start(gtid);
795 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000797 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798}
799
Jonathan Peyton30419822017-05-12 18:01:32 +0000800void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000801#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 if (__kmp_env_consistency_check)
805 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806}
807
Jonathan Peyton30419822017-05-12 18:01:32 +0000808/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000809 * how many threads we can use
810 * set_nproc is the number of threads requested for the team
811 * returns 0 if we should serialize or only use one thread,
812 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000813 * The forkjoin lock is held by the caller. */
814static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
815 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000817 ,
818 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 ) {
821 int capacity;
822 int new_nthreads;
823 KMP_DEBUG_ASSERT(__kmp_init_serial);
824 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 // If dyn-var is set, dynamically adjust the number of desired threads,
827 // according to the method specified by dynamic_mode.
828 new_nthreads = set_nthreads;
829 if (!get__dynamic_2(parent_team, master_tid)) {
830 ;
831 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000833 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
834 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
835 if (new_nthreads == 1) {
836 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
837 "reservation to 1 thread\n",
838 master_tid));
839 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000841 if (new_nthreads < set_nthreads) {
842 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
843 "reservation to %d threads\n",
844 master_tid, new_nthreads));
845 }
846 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
849 new_nthreads = __kmp_avail_proc - __kmp_nth +
850 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
851 if (new_nthreads <= 1) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
853 "reservation to 1 thread\n",
854 master_tid));
855 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000857 if (new_nthreads < set_nthreads) {
858 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
859 "reservation to %d threads\n",
860 master_tid, new_nthreads));
861 } else {
862 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000863 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000864 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
865 if (set_nthreads > 2) {
866 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
867 new_nthreads = (new_nthreads % set_nthreads) + 1;
868 if (new_nthreads == 1) {
869 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
870 "reservation to 1 thread\n",
871 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 }
874 if (new_nthreads < set_nthreads) {
875 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
876 "reservation to %d threads\n",
877 master_tid, new_nthreads));
878 }
879 }
880 } else {
881 KMP_ASSERT(0);
882 }
883
884 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
885 if (__kmp_nth + new_nthreads -
886 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
887 __kmp_max_nth) {
888 int tl_nthreads = __kmp_max_nth - __kmp_nth +
889 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
890 if (tl_nthreads <= 0) {
891 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000892 }
893
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 // If dyn-var is false, emit a 1-time warning.
895 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
896 __kmp_reserve_warn = 1;
897 __kmp_msg(kmp_ms_warning,
898 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
899 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
900 }
901 if (tl_nthreads == 1) {
902 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
903 "reservation to 1 thread\n",
904 master_tid));
905 return 1;
906 }
907 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
908 "reservation to %d threads\n",
909 master_tid, tl_nthreads));
910 new_nthreads = tl_nthreads;
911 }
912
913 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000914 // See comment in __kmp_register_root() about the adjustment if
915 // __kmp_threads[0] == NULL.
916 capacity = __kmp_threads_capacity;
917 if (TCR_PTR(__kmp_threads[0]) == NULL) {
918 --capacity;
919 }
920 if (__kmp_nth + new_nthreads -
921 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
922 capacity) {
923 // Expand the threads array.
924 int slotsRequired = __kmp_nth + new_nthreads -
925 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
926 capacity;
927 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
928 if (slotsAdded < slotsRequired) {
929 // The threads array was not expanded enough.
930 new_nthreads -= (slotsRequired - slotsAdded);
931 KMP_ASSERT(new_nthreads >= 1);
932
933 // If dyn-var is false, emit a 1-time warning.
934 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
935 __kmp_reserve_warn = 1;
936 if (__kmp_tp_cached) {
937 __kmp_msg(kmp_ms_warning,
938 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
939 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
940 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
941 } else {
942 __kmp_msg(kmp_ms_warning,
943 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
944 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
945 }
946 }
947 }
948 }
949
Jonathan Peyton642688b2017-06-01 16:46:36 +0000950#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 if (new_nthreads == 1) {
952 KC_TRACE(10,
953 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
954 "dead roots and rechecking; requested %d threads\n",
955 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000956 } else {
957 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
958 " %d threads\n",
959 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000960 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000961#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000962 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000963}
964
Jonathan Peyton30419822017-05-12 18:01:32 +0000965/* Allocate threads from the thread pool and assign them to the new team. We are
966 assured that there are enough threads available, because we checked on that
967 earlier within critical section forkjoin */
968static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
969 kmp_info_t *master_th, int master_gtid) {
970 int i;
971 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972
Jonathan Peyton30419822017-05-12 18:01:32 +0000973 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
974 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
975 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000976
Jonathan Peyton30419822017-05-12 18:01:32 +0000977 /* first, let's setup the master thread */
978 master_th->th.th_info.ds.ds_tid = 0;
979 master_th->th.th_team = team;
980 master_th->th.th_team_nproc = team->t.t_nproc;
981 master_th->th.th_team_master = master_th;
982 master_th->th.th_team_serialized = FALSE;
983 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984
Jonathan Peyton30419822017-05-12 18:01:32 +0000985/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000986#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +0000987 use_hot_team = 0;
988 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
989 if (hot_teams) { // hot teams array is not allocated if
990 // KMP_HOT_TEAMS_MAX_LEVEL=0
991 int level = team->t.t_active_level - 1; // index in array of hot teams
992 if (master_th->th.th_teams_microtask) { // are we inside the teams?
993 if (master_th->th.th_teams_size.nteams > 1) {
994 ++level; // level was not increased in teams construct for
995 // team_of_masters
996 }
997 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
998 master_th->th.th_teams_level == team->t.t_level) {
999 ++level; // level was not increased in teams construct for
1000 // team_of_workers before the parallel
1001 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001002 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001003 if (level < __kmp_hot_teams_max_level) {
1004 if (hot_teams[level].hot_team) {
1005 // hot team has already been allocated for given level
1006 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1007 use_hot_team = 1; // the team is ready to use
1008 } else {
1009 use_hot_team = 0; // AC: threads are not allocated yet
1010 hot_teams[level].hot_team = team; // remember new hot team
1011 hot_teams[level].hot_team_nth = team->t.t_nproc;
1012 }
1013 } else {
1014 use_hot_team = 0;
1015 }
1016 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001017#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001018 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001019#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001020 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001021
Jonathan Peyton30419822017-05-12 18:01:32 +00001022 /* install the master thread */
1023 team->t.t_threads[0] = master_th;
1024 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001025
Jonathan Peyton30419822017-05-12 18:01:32 +00001026 /* now, install the worker threads */
1027 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
Jonathan Peyton30419822017-05-12 18:01:32 +00001029 /* fork or reallocate a new thread and install it in team */
1030 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1031 team->t.t_threads[i] = thr;
1032 KMP_DEBUG_ASSERT(thr);
1033 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1034 /* align team and thread arrived states */
1035 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1036 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1037 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1038 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1039 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1040 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001041#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001042 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1043 thr->th.th_teams_level = master_th->th.th_teams_level;
1044 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001045#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001046 { // Initialize threads' barrier data.
1047 int b;
1048 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1049 for (b = 0; b < bs_last_barrier; ++b) {
1050 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1051 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001052#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001053 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001055 }; // for b
1056 }
1057 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058
Alp Toker98758b02014-03-02 04:12:06 +00001059#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001062 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001063
Jonathan Peyton30419822017-05-12 18:01:32 +00001064 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065}
1066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001067#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001069// We try to avoid unnecessary writes to the relevant cache line in the team
1070// structure, so we don't make changes unless they are needed.
1071inline static void propagateFPControl(kmp_team_t *team) {
1072 if (__kmp_inherit_fp_control) {
1073 kmp_int16 x87_fpu_control_word;
1074 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 // Get master values of FPU control flags (both X87 and vector)
1077 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1078 __kmp_store_mxcsr(&mxcsr);
1079 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001080
Jonathan Peyton30419822017-05-12 18:01:32 +00001081// There is no point looking at t_fp_control_saved here.
1082// If it is TRUE, we still have to update the values if they are different from
1083// those we now have.
1084// If it is FALSE we didn't save anything yet, but our objective is the same. We
1085// have to ensure that the values in the team are the same as those we have.
1086// So, this code achieves what we need whether or not t_fp_control_saved is
1087// true. By checking whether the value needs updating we avoid unnecessary
1088// writes that would put the cache-line into a written state, causing all
1089// threads in the team to have to read it again.
1090 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1091 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1092 // Although we don't use this value, other code in the runtime wants to know
1093 // whether it should restore them. So we must ensure it is correct.
1094 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1095 } else {
1096 // Similarly here. Don't write to this cache-line in the team structure
1097 // unless we have to.
1098 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1099 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100}
1101
Jonathan Peyton30419822017-05-12 18:01:32 +00001102// Do the opposite, setting the hardware registers to the updated values from
1103// the team.
1104inline static void updateHWFPControl(kmp_team_t *team) {
1105 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1106 // Only reset the fp control regs if they have been changed in the team.
1107 // the parallel region that we are exiting.
1108 kmp_int16 x87_fpu_control_word;
1109 kmp_uint32 mxcsr;
1110 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1111 __kmp_store_mxcsr(&mxcsr);
1112 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001113
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1115 __kmp_clear_x87_fpu_status_word();
1116 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001117 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001118
1119 if (team->t.t_mxcsr != mxcsr) {
1120 __kmp_load_mxcsr(&team->t.t_mxcsr);
1121 }
1122 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001123}
1124#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001125#define propagateFPControl(x) ((void)0)
1126#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001127#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1128
Jonathan Peyton30419822017-05-12 18:01:32 +00001129static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1130 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131
Jonathan Peyton30419822017-05-12 18:01:32 +00001132/* Run a parallel region that has been serialized, so runs only in a team of the
1133 single master thread. */
1134void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1135 kmp_info_t *this_thr;
1136 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001137
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001139
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 /* Skip all this code for autopar serialized loops since it results in
1141 unacceptable overhead */
1142 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1143 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 if (!TCR_4(__kmp_init_parallel))
1146 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001147
Jonathan Peyton30419822017-05-12 18:01:32 +00001148 this_thr = __kmp_threads[global_tid];
1149 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001150
Jonathan Peyton30419822017-05-12 18:01:32 +00001151 /* utilize the serialized team held by this thread */
1152 KMP_DEBUG_ASSERT(serial_team);
1153 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001154
Jonathan Peyton30419822017-05-12 18:01:32 +00001155 if (__kmp_tasking_mode != tskm_immediate_exec) {
1156 KMP_DEBUG_ASSERT(
1157 this_thr->th.th_task_team ==
1158 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1159 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1160 NULL);
1161 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1162 "team %p, new task_team = NULL\n",
1163 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1164 this_thr->th.th_task_team = NULL;
1165 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001166
1167#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1169 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1170 proc_bind = proc_bind_false;
1171 } else if (proc_bind == proc_bind_default) {
1172 // No proc_bind clause was specified, so use the current value
1173 // of proc-bind-var for this parallel region.
1174 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1175 }
1176 // Reset for next parallel region
1177 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178#endif /* OMP_40_ENABLED */
1179
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 if (this_thr->th.th_team != serial_team) {
1181 // Nested level will be an index in the nested nthreads array
1182 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001183
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 if (serial_team->t.t_serialized) {
1185 /* this serial team was already used
1186 TODO increase performance by making this locks more specific */
1187 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001190
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001191#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001192 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001193#endif
1194
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001196#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001197 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001198#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001199#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001202 &this_thr->th.th_current_task->td_icvs,
1203 0 USE_NESTED_HOT_ARG(NULL));
1204 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1205 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001206
Jonathan Peyton30419822017-05-12 18:01:32 +00001207 /* setup new serialized team and install it */
1208 new_team->t.t_threads[0] = this_thr;
1209 new_team->t.t_parent = this_thr->th.th_team;
1210 serial_team = new_team;
1211 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213 KF_TRACE(
1214 10,
1215 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1216 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 /* TODO the above breaks the requirement that if we run out of resources,
1219 then we can still guarantee that serialized teams are ok, since we may
1220 need to allocate a new one */
1221 } else {
1222 KF_TRACE(
1223 10,
1224 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1225 global_tid, serial_team));
1226 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001227
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 /* we have to initialize this serial team */
1229 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1230 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1231 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1232 serial_team->t.t_ident = loc;
1233 serial_team->t.t_serialized = 1;
1234 serial_team->t.t_nproc = 1;
1235 serial_team->t.t_parent = this_thr->th.th_team;
1236 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1237 this_thr->th.th_team = serial_team;
1238 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239
Jonathan Peyton30419822017-05-12 18:01:32 +00001240 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1241 this_thr->th.th_current_task));
1242 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1243 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001244
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001246
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1248 implicit task for each serialized task represented by
1249 team->t.t_serialized? */
1250 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1251 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001252
Jonathan Peyton30419822017-05-12 18:01:32 +00001253 // Thread value exists in the nested nthreads array for the next nested
1254 // level
1255 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1256 this_thr->th.th_current_task->td_icvs.nproc =
1257 __kmp_nested_nth.nth[level + 1];
1258 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001259
1260#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001261 if (__kmp_nested_proc_bind.used &&
1262 (level + 1 < __kmp_nested_proc_bind.used)) {
1263 this_thr->th.th_current_task->td_icvs.proc_bind =
1264 __kmp_nested_proc_bind.bind_types[level + 1];
1265 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001266#endif /* OMP_40_ENABLED */
1267
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001268#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001269 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001270#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001271 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001272
Jonathan Peyton30419822017-05-12 18:01:32 +00001273 /* set thread cache values */
1274 this_thr->th.th_team_nproc = 1;
1275 this_thr->th.th_team_master = this_thr;
1276 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001277
Jonathan Peyton30419822017-05-12 18:01:32 +00001278 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1279 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280
Jonathan Peyton30419822017-05-12 18:01:32 +00001281 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001282
Jonathan Peyton30419822017-05-12 18:01:32 +00001283 /* check if we need to allocate dispatch buffers stack */
1284 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1285 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1286 serial_team->t.t_dispatch->th_disp_buffer =
1287 (dispatch_private_info_t *)__kmp_allocate(
1288 sizeof(dispatch_private_info_t));
1289 }
1290 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001291
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001292#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001293 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1294 __ompt_team_assign_id(serial_team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001295#endif
1296
Jonathan Peyton30419822017-05-12 18:01:32 +00001297 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001298
Jonathan Peyton30419822017-05-12 18:01:32 +00001299 } else {
1300 /* this serialized team is already being used,
1301 * that's fine, just add another nested level */
1302 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1303 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1305 ++serial_team->t.t_serialized;
1306 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 // Nested level will be an index in the nested nthreads array
1309 int level = this_thr->th.th_team->t.t_level;
1310 // Thread value exists in the nested nthreads array for the next nested
1311 // level
1312 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1313 this_thr->th.th_current_task->td_icvs.nproc =
1314 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001316 serial_team->t.t_level++;
1317 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1318 "of serial team %p to %d\n",
1319 global_tid, serial_team, serial_team->t.t_level));
1320
1321 /* allocate/push dispatch buffers stack */
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 {
1324 dispatch_private_info_t *disp_buffer =
1325 (dispatch_private_info_t *)__kmp_allocate(
1326 sizeof(dispatch_private_info_t));
1327 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1328 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1329 }
1330 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1331
1332 KMP_MB();
1333 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001334#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001336#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 if (__kmp_env_consistency_check)
1339 __kmp_push_parallel(global_tid, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001340}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001341
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342/* most of the work for a fork */
1343/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001344int __kmp_fork_call(ident_t *loc, int gtid,
1345 enum fork_context_e call_context, // Intel, GNU, ...
1346 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001347#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 void *unwrapped_task,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001349#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001350 microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001351/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001352#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001353 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001354#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001357 ) {
1358 void **argv;
1359 int i;
1360 int master_tid;
1361 int master_this_cons;
1362 kmp_team_t *team;
1363 kmp_team_t *parent_team;
1364 kmp_info_t *master_th;
1365 kmp_root_t *root;
1366 int nthreads;
1367 int master_active;
1368 int master_set_numthreads;
1369 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001370#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001371 int active_level;
1372 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001374#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001376#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001377 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001379 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380
Jonathan Peyton30419822017-05-12 18:01:32 +00001381 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1382 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1383 /* Some systems prefer the stack for the root thread(s) to start with */
1384 /* some gap from the parent stack to prevent false sharing. */
1385 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 /* These 2 lines below are so this does not get optimized out */
1387 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1388 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001389 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390
1391 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001392 KMP_DEBUG_ASSERT(
1393 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1394 if (!TCR_4(__kmp_init_parallel))
1395 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001396
1397 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001398 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1399 // shutdown
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001402 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001403 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001406
1407#if OMPT_SUPPORT
1408 ompt_parallel_id_t ompt_parallel_id;
1409 ompt_task_id_t ompt_task_id;
1410 ompt_frame_t *ompt_frame;
1411 ompt_task_id_t my_task_id;
1412 ompt_parallel_id_t my_parallel_id;
1413
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001414 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1416 ompt_task_id = __ompt_get_task_id_internal(0);
1417 ompt_frame = __ompt_get_task_frame_internal(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001418 }
1419#endif
1420
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001422 level = parent_team->t.t_level;
1423 // used to launch non-serial teams even if nested is not allowed
1424 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001425#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001426 // needed to check nesting inside the teams
1427 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001429#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001430 p_hot_teams = &master_th->th.th_hot_teams;
1431 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1432 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1433 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1434 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001435 // it is either actual or not needed (when active_level > 0)
1436 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001437 }
1438#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001439
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001440#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001441 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001442 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001443 int team_size = master_set_numthreads;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001444
Jonathan Peyton30419822017-05-12 18:01:32 +00001445 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1446 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
1447 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001448 }
1449#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451 master_th->th.th_ident = loc;
1452
1453#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001454 if (master_th->th.th_teams_microtask && ap &&
1455 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1456 // AC: This is start of parallel that is nested inside teams construct.
1457 // The team is actual (hot), all workers are ready at the fork barrier.
1458 // No lock needed to initialize the team a bit, then free workers.
1459 parent_team->t.t_ident = loc;
1460 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1461 parent_team->t.t_argc = argc;
1462 argv = (void **)parent_team->t.t_argv;
1463 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001465#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001466 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001468 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001470 // Increment our nested depth levels, but not increase the serialization
1471 if (parent_team == master_th->th.th_serial_team) {
1472 // AC: we are in serialized parallel
1473 __kmpc_serialized_parallel(loc, gtid);
1474 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1475 // AC: need this in order enquiry functions work
1476 // correctly, will restore at join time
1477 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001478#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001479 void *dummy;
1480 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001483
Jonathan Peyton30419822017-05-12 18:01:32 +00001484 if (ompt_enabled) {
1485 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
1486 ompt_parallel_id);
1487 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1488 exit_runtime_p =
1489 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001490
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001492
1493#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001494 /* OMPT implicit task begin */
1495 my_task_id = lw_taskteam.ompt_task_info.task_id;
1496 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1497 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1498 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1499 my_parallel_id, my_task_id);
1500 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001501#endif
1502
Jonathan Peyton30419822017-05-12 18:01:32 +00001503 /* OMPT state */
1504 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1505 } else {
1506 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001507 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001510 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001511 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1512 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1513 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1514#if OMPT_SUPPORT
1515 ,
1516 exit_runtime_p
1517#endif
1518 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001520
Jonathan Peyton30419822017-05-12 18:01:32 +00001521#if OMPT_SUPPORT
1522 *exit_runtime_p = NULL;
1523 if (ompt_enabled) {
1524#if OMPT_TRACE
1525 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001526
Jonathan Peyton30419822017-05-12 18:01:32 +00001527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1529 ompt_parallel_id, ompt_task_id);
1530 }
1531
1532 __ompt_lw_taskteam_unlink(master_th);
1533 // reset clear the task id only after unlinking the task
1534 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1535#endif
1536
1537 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1538 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1539 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1540 }
1541 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1542 }
1543#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001545 }
1546
1547 parent_team->t.t_pkfn = microtask;
1548#if OMPT_SUPPORT
1549 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1550#endif
1551 parent_team->t.t_invoke = invoker;
1552 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1553 parent_team->t.t_active_level++;
1554 parent_team->t.t_level++;
1555
1556 /* Change number of threads in the team if requested */
1557 if (master_set_numthreads) { // The parallel has num_threads clause
1558 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1559 // AC: only can reduce number of threads dynamically, can't increase
1560 kmp_info_t **other_threads = parent_team->t.t_threads;
1561 parent_team->t.t_nproc = master_set_numthreads;
1562 for (i = 0; i < master_set_numthreads; ++i) {
1563 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1564 }
1565 // Keep extra threads hot in the team for possible next parallels
1566 }
1567 master_th->th.th_set_nproc = 0;
1568 }
1569
1570#if USE_DEBUGGER
1571 if (__kmp_debugging) { // Let debugger override number of threads.
1572 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001573 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001574 master_set_numthreads = nth;
1575 }; // if
1576 }; // if
1577#endif
1578
1579 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1580 "master_th=%p, gtid=%d\n",
1581 root, parent_team, master_th, gtid));
1582 __kmp_internal_fork(loc, gtid, parent_team);
1583 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1584 "master_th=%p, gtid=%d\n",
1585 root, parent_team, master_th, gtid));
1586
1587 /* Invoke microtask for MASTER thread */
1588 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1589 parent_team->t.t_id, parent_team->t.t_pkfn));
1590
1591 {
1592 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1593 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1594 if (!parent_team->t.t_invoke(gtid)) {
1595 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
1596 }
1597 }
1598 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1599 parent_team->t.t_id, parent_team->t.t_pkfn));
1600 KMP_MB(); /* Flush all pending memory write invalidates. */
1601
1602 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1603
1604 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001605 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001606#endif /* OMP_40_ENABLED */
1607
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001608#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 if (__kmp_tasking_mode != tskm_immediate_exec) {
1610 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1611 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 if (parent_team->t.t_active_level >=
1616 master_th->th.th_current_task->td_icvs.max_active_levels) {
1617 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001618 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001619#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001620 int enter_teams = ((ap == NULL && active_level == 0) ||
1621 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001622#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 nthreads =
1624 master_set_numthreads
1625 ? master_set_numthreads
1626 : get__nproc_2(
1627 parent_team,
1628 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001629
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 // Check if we need to take forkjoin lock? (no need for serialized
1631 // parallel out of teams construct). This code moved here from
1632 // __kmp_reserve_threads() to speedup nested serialized parallels.
1633 if (nthreads > 1) {
1634 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001635#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001636 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001637#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 )) ||
1639 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001640 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1641 " threads\n",
1642 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001643 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001644 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001645 }
1646 if (nthreads > 1) {
1647 /* determine how many new threads we can use */
1648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 nthreads = __kmp_reserve_threads(
1650 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001652 /* AC: If we execute teams from parallel region (on host), then
1653 teams should be created but each can only have 1 thread if
1654 nesting is disabled. If teams called from serial region, then
1655 teams and their threads should be created regardless of the
1656 nesting setting. */
1657 ,
1658 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001659#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001660 );
1661 if (nthreads == 1) {
1662 // Free lock for single thread execution here; for multi-thread
1663 // execution it will be freed later after team of threads created
1664 // and initialized
1665 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001666 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001668 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001669 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 if (nthreads == 1) {
1676/* josh todo: hypothetical question: what do we do for OS X*? */
1677#if KMP_OS_LINUX && \
1678 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1679 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001681 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1682#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1683 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001684
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 KA_TRACE(20,
1686 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687
Jonathan Peyton30419822017-05-12 18:01:32 +00001688 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 if (call_context == fork_context_intel) {
1691 /* TODO this sucks, use the compiler itself to pass args! :) */
1692 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 if (!ap) {
1695 // revert change made in __kmpc_serialized_parallel()
1696 master_th->th.th_serial_team->t.t_level--;
1697// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001698
1699#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 void *dummy;
1701 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001704
Jonathan Peyton30419822017-05-12 18:01:32 +00001705 if (ompt_enabled) {
1706 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1707 unwrapped_task, ompt_parallel_id);
1708 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1709 exit_runtime_p =
1710 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001713
1714#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 my_task_id = lw_taskteam.ompt_task_info.task_id;
1716 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1717 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1718 ompt_parallel_id, my_task_id);
1719 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001720#endif
1721
Jonathan Peyton30419822017-05-12 18:01:32 +00001722 /* OMPT state */
1723 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1724 } else {
1725 exit_runtime_p = &dummy;
1726 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001727#endif
1728
Jonathan Peyton30419822017-05-12 18:01:32 +00001729 {
1730 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1731 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1732 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1733 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001734#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001735 ,
1736 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001737#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001738 );
1739 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740
1741#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 *exit_runtime_p = NULL;
1743 if (ompt_enabled) {
1744 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745
1746#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001747 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1748 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1749 ompt_parallel_id, ompt_task_id);
1750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751#endif
1752
Jonathan Peyton30419822017-05-12 18:01:32 +00001753 __ompt_lw_taskteam_unlink(master_th);
1754 // reset clear the task id only after unlinking the task
1755 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001756
Jonathan Peyton30419822017-05-12 18:01:32 +00001757 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1758 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1759 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1760 }
1761 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1762 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001764 } else if (microtask == (microtask_t)__kmp_teams_master) {
1765 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1766 master_th->th.th_serial_team);
1767 team = master_th->th.th_team;
1768 // team->t.t_pkfn = microtask;
1769 team->t.t_invoke = invoker;
1770 __kmp_alloc_argv_entries(argc, team, TRUE);
1771 team->t.t_argc = argc;
1772 argv = (void **)team->t.t_argv;
1773 if (ap) {
1774 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001775// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001776#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001779 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001780#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001781 } else {
1782 for (i = 0; i < argc; ++i)
1783 // Get args from parent team for teams construct
1784 argv[i] = parent_team->t.t_argv[i];
1785 }
1786 // AC: revert change made in __kmpc_serialized_parallel()
1787 // because initial code in teams should have level=0
1788 team->t.t_level--;
1789 // AC: call special invoker for outer "parallel" of teams construct
1790 {
1791 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1792 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1793 invoker(gtid);
1794 }
1795 } else {
1796#endif /* OMP_40_ENABLED */
1797 argv = args;
1798 for (i = argc - 1; i >= 0; --i)
1799// TODO: revert workaround for Intel(R) 64 tracker #96
1800#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1801 *argv++ = va_arg(*ap, void *);
1802#else
1803 *argv++ = va_arg(ap, void *);
1804#endif
1805 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806
1807#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001808 void *dummy;
1809 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001810
Jonathan Peyton30419822017-05-12 18:01:32 +00001811 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001812
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 if (ompt_enabled) {
1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1815 unwrapped_task, ompt_parallel_id);
1816 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1817 exit_runtime_p =
1818 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819
Jonathan Peyton30419822017-05-12 18:01:32 +00001820 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821
1822#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 /* OMPT implicit task begin */
1824 my_task_id = lw_taskteam.ompt_task_info.task_id;
1825 my_parallel_id = ompt_parallel_id;
1826 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1827 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1828 my_parallel_id, my_task_id);
1829 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001830#endif
1831
Jonathan Peyton30419822017-05-12 18:01:32 +00001832 /* OMPT state */
1833 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1834 } else {
1835 exit_runtime_p = &dummy;
1836 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001837#endif
1838
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 {
1840 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1841 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1842 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001843#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 ,
1845 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001846#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001847 );
1848 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001849
1850#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 *exit_runtime_p = NULL;
1852 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001853#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1857 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1858 my_parallel_id, my_task_id);
1859 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860#endif
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862 __ompt_lw_taskteam_unlink(master_th);
1863 // reset clear the task id only after unlinking the task
1864 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1867 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1868 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1869 }
1870 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1871 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001872#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001874 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001875#endif /* OMP_40_ENABLED */
1876 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001877#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 ompt_lw_taskteam_t *lwt =
1879 (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
1880 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
1881 ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1884 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
1885 __ompt_lw_taskteam_link(lwt, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886#endif
1887
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 // we were called from GNU native code
1889 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001890 return FALSE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001891 }
1892 else {
1893 KMP_ASSERT2(call_context < fork_context_last,
1894 "__kmp_fork_call: unknown fork_context parameter");
1895 }
1896
1897 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1898 KMP_MB();
1899 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900 }
1901
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902 // GEH: only modify the executing flag in the case when not serialized
1903 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001904 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1905 "curtask=%p, curtask_max_aclevel=%d\n",
1906 parent_team->t.t_active_level, master_th,
1907 master_th->th.th_current_task,
1908 master_th->th.th_current_task->td_icvs.max_active_levels));
1909 // TODO: GEH - cannot do this assertion because root thread not set up as
1910 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1912 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913
1914#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916#endif /* OMP_40_ENABLED */
1917 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001918 /* Increment our nested depth level */
1919 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 }
1921
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 if ((level + 1 < __kmp_nested_nth.used) &&
1925 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1926 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1927 } else {
1928 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 }
1930
1931#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001934 kmp_proc_bind_t proc_bind_icv =
1935 proc_bind_default; // proc_bind_default means don't update
1936 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1937 proc_bind = proc_bind_false;
1938 } else {
1939 if (proc_bind == proc_bind_default) {
1940 // No proc_bind clause specified; use current proc-bind-var for this
1941 // parallel region
1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1943 }
1944 /* else: The proc_bind policy was specified explicitly on parallel clause.
1945 This overrides proc-bind-var for this parallel region, but does not
1946 change proc-bind-var. */
1947 // Figure the value of proc-bind-var for the child threads.
1948 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1949 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1950 master_th->th.th_current_task->td_icvs.proc_bind)) {
1951 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1952 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 }
1954
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001956 master_th->th.th_set_proc_bind = proc_bind_default;
1957#endif /* OMP_40_ENABLED */
1958
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001959 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001961 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001963 ) {
1964 kmp_internal_control_t new_icvs;
1965 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1966 new_icvs.next = NULL;
1967 if (nthreads_icv > 0) {
1968 new_icvs.nproc = nthreads_icv;
1969 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970
1971#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001972 if (proc_bind_icv != proc_bind_default) {
1973 new_icvs.proc_bind = proc_bind_icv;
1974 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975#endif /* OMP_40_ENABLED */
1976
Jonathan Peyton30419822017-05-12 18:01:32 +00001977 /* allocate a new parallel team */
1978 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
1979 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001980#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001981 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001982#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001984 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001986 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001988 /* allocate a new parallel team */
1989 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
1990 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001991#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001993#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001995 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001997 &master_th->th.th_current_task->td_icvs,
1998 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002000 KF_TRACE(
2001 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002
2003 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002004 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2005 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2006 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2007 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2008 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002009#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002010 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2013// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002015 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002017 int new_level = parent_team->t.t_level + 1;
2018 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2019 new_level = parent_team->t.t_active_level + 1;
2020 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021#if OMP_40_ENABLED
2022 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 // AC: Do not increase parallel level at start of the teams construct
2024 int new_level = parent_team->t.t_level;
2025 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2026 new_level = parent_team->t.t_active_level;
2027 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002028 }
2029#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002030 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002031 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
2032 team->t.t_sched.chunk != new_sched.chunk)
2033 team->t.t_sched =
2034 new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002036#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002037 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002038#endif
2039
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 // Update the floating point rounding in the team if required.
2041 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042
Jonathan Peyton30419822017-05-12 18:01:32 +00002043 if (__kmp_tasking_mode != tskm_immediate_exec) {
2044 // Set master's task team to team's task team. Unless this is hot team, it
2045 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002046 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2047 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002048 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2049 "%p, new task_team %p / team %p\n",
2050 __kmp_gtid_from_thread(master_th),
2051 master_th->th.th_task_team, parent_team,
2052 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002053
Jonathan Peyton30419822017-05-12 18:01:32 +00002054 if (active_level || master_th->th.th_task_team) {
2055 // Take a memo of master's task_state
2056 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2057 if (master_th->th.th_task_state_top >=
2058 master_th->th.th_task_state_stack_sz) { // increase size
2059 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2060 kmp_uint8 *old_stack, *new_stack;
2061 kmp_uint32 i;
2062 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2063 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2064 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2065 }
2066 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2067 ++i) { // zero-init rest of stack
2068 new_stack[i] = 0;
2069 }
2070 old_stack = master_th->th.th_task_state_memo_stack;
2071 master_th->th.th_task_state_memo_stack = new_stack;
2072 master_th->th.th_task_state_stack_sz = new_size;
2073 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002075 // Store master's task_state on stack
2076 master_th->th
2077 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2078 master_th->th.th_task_state;
2079 master_th->th.th_task_state_top++;
2080#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002081 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2082 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002083 master_th->th.th_task_state =
2084 master_th->th
2085 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2086 } else {
2087#endif
2088 master_th->th.th_task_state = 0;
2089#if KMP_NESTED_HOT_TEAMS
2090 }
2091#endif
2092 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002093#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002094 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2095 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002098
Jonathan Peyton30419822017-05-12 18:01:32 +00002099 KA_TRACE(
2100 20,
2101 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2102 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2103 team->t.t_nproc));
2104 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2105 (team->t.t_master_tid == 0 &&
2106 (team->t.t_parent == root->r.r_root_team ||
2107 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108 KMP_MB();
2109
2110 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002111 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002113 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002115 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002116// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002117#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002118 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002119#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002121#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002122 KMP_CHECK_UPDATE(*argv, new_argv);
2123 argv++;
2124 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002125#if OMP_40_ENABLED
2126 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002127 for (i = 0; i < argc; ++i) {
2128 // Get args from parent team for teams construct
2129 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2130 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002131 }
2132#endif /* OMP_40_ENABLED */
2133
2134 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002135 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002136 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002137 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 __kmp_fork_team_threads(root, team, master_th, gtid);
2140 __kmp_setup_icv_copy(team, nthreads,
2141 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002142
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002143#if OMPT_SUPPORT
2144 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2145#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002146
Jonathan Peyton30419822017-05-12 18:01:32 +00002147 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002148
Jim Cownie5e8470a2013-09-27 10:38:44 +00002149#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002150 if (team->t.t_active_level == 1 // only report frames at level 1
2151#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002152 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002153#endif /* OMP_40_ENABLED */
2154 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002155#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002156 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2157 (__kmp_forkjoin_frames_mode == 3 ||
2158 __kmp_forkjoin_frames_mode == 1)) {
2159 kmp_uint64 tmp_time = 0;
2160 if (__itt_get_timestamp_ptr)
2161 tmp_time = __itt_get_timestamp();
2162 // Internal fork - report frame begin
2163 master_th->th.th_frame_time = tmp_time;
2164 if (__kmp_forkjoin_frames_mode == 3)
2165 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002166 } else
2167// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002168#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002169 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2170 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2171 // Mark start of "parallel" region for VTune.
2172 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2173 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175#endif /* USE_ITT_BUILD */
2176
2177 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002178 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002179 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002180 KF_TRACE(10,
2181 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2182 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183
2184#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002185 if (__itt_stack_caller_create_ptr) {
2186 team->t.t_stack_id =
2187 __kmp_itt_stack_caller_create(); // create new stack stitching id
2188 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189 }
2190#endif /* USE_ITT_BUILD */
2191
2192#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002193 // AC: skip __kmp_internal_fork at teams construct, let only master
2194 // threads execute
2195 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002196#endif /* OMP_40_ENABLED */
2197 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002198 __kmp_internal_fork(loc, gtid, team);
2199 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2200 "master_th=%p, gtid=%d\n",
2201 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002202 }
2203
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002205 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2206 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207 }
2208
2209 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002210 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2211 team->t.t_id, team->t.t_pkfn));
2212 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002213
Jonathan Peyton30419822017-05-12 18:01:32 +00002214 {
2215 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2216 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2217 if (!team->t.t_invoke(gtid)) {
2218 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jim Cownie5e8470a2013-09-27 10:38:44 +00002219 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002220 }
2221 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2222 team->t.t_id, team->t.t_pkfn));
2223 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224
Jonathan Peyton30419822017-05-12 18:01:32 +00002225 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002226
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002227#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002228 if (ompt_enabled) {
2229 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2230 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002231#endif
2232
Jonathan Peyton30419822017-05-12 18:01:32 +00002233 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002234}
2235
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002236#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002237static inline void __kmp_join_restore_state(kmp_info_t *thread,
2238 kmp_team_t *team) {
2239 // restore state outside the region
2240 thread->th.ompt_thread_info.state =
2241 ((team->t.t_serialized) ? ompt_state_work_serial
2242 : ompt_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243}
2244
Jonathan Peyton30419822017-05-12 18:01:32 +00002245static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
2246 ompt_parallel_id_t parallel_id,
2247 fork_context_e fork_context) {
2248 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2249 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2250 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2251 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2252 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002253
Jonathan Peyton30419822017-05-12 18:01:32 +00002254 task_info->frame.reenter_runtime_frame = NULL;
2255 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002256}
2257#endif
2258
Jonathan Peyton30419822017-05-12 18:01:32 +00002259void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002260#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002261 ,
2262 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002263#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002264#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002265 ,
2266 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002268 ) {
2269 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2270 kmp_team_t *team;
2271 kmp_team_t *parent_team;
2272 kmp_info_t *master_th;
2273 kmp_root_t *root;
2274 int master_active;
2275 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276
Jonathan Peyton30419822017-05-12 18:01:32 +00002277 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002278
Jonathan Peyton30419822017-05-12 18:01:32 +00002279 /* setup current data */
2280 master_th = __kmp_threads[gtid];
2281 root = master_th->th.th_root;
2282 team = master_th->th.th_team;
2283 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002284
Jonathan Peyton30419822017-05-12 18:01:32 +00002285 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002287#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002288 if (ompt_enabled) {
2289 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2290 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002291#endif
2292
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002293#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002294 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2295 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2296 "th_task_team = %p\n",
2297 __kmp_gtid_from_thread(master_th), team,
2298 team->t.t_task_team[master_th->th.th_task_state],
2299 master_th->th.th_task_team));
2300 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2301 team->t.t_task_team[master_th->th.th_task_state]);
2302 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002303#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002306#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002307 if (master_th->th.th_teams_microtask) {
2308 // We are in teams construct
2309 int level = team->t.t_level;
2310 int tlevel = master_th->th.th_teams_level;
2311 if (level == tlevel) {
2312 // AC: we haven't incremented it earlier at start of teams construct,
2313 // so do it here - at the end of teams construct
2314 team->t.t_level++;
2315 } else if (level == tlevel + 1) {
2316 // AC: we are exiting parallel inside teams, need to increment
2317 // serialization in order to restore it in the next call to
2318 // __kmpc_end_serialized_parallel
2319 team->t.t_serialized++;
2320 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002321 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002322#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002323 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002325#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002326 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002327 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002328 }
2329#endif
2330
Jonathan Peyton30419822017-05-12 18:01:32 +00002331 return;
2332 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002333
Jonathan Peyton30419822017-05-12 18:01:32 +00002334 master_active = team->t.t_master_active;
2335
2336#if OMP_40_ENABLED
2337 if (!exit_teams)
2338#endif /* OMP_40_ENABLED */
2339 {
2340 // AC: No barrier for internal teams at exit from teams construct.
2341 // But there is barrier for external team (league).
2342 __kmp_internal_join(loc, gtid, team);
2343 }
2344#if OMP_40_ENABLED
2345 else {
2346 master_th->th.th_task_state =
2347 0; // AC: no tasking in teams (out of any parallel)
2348 }
2349#endif /* OMP_40_ENABLED */
2350
2351 KMP_MB();
2352
2353#if OMPT_SUPPORT
2354 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2355#endif
2356
2357#if USE_ITT_BUILD
2358 if (__itt_stack_caller_create_ptr) {
2359 __kmp_itt_stack_caller_destroy(
2360 (__itt_caller)team->t
2361 .t_stack_id); // destroy the stack stitching id after join barrier
2362 }
2363
2364 // Mark end of "parallel" region for VTune.
2365 if (team->t.t_active_level == 1
2366#if OMP_40_ENABLED
2367 && !master_th->th.th_teams_microtask /* not in teams construct */
2368#endif /* OMP_40_ENABLED */
2369 ) {
2370 master_th->th.th_ident = loc;
2371 // only one notification scheme (either "submit" or "forking/joined", not
2372 // both)
2373 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2374 __kmp_forkjoin_frames_mode == 3)
2375 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2376 master_th->th.th_frame_time, 0, loc,
2377 master_th->th.th_team_nproc, 1);
2378 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2379 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2380 __kmp_itt_region_joined(gtid);
2381 } // active_level == 1
2382#endif /* USE_ITT_BUILD */
2383
2384#if OMP_40_ENABLED
2385 if (master_th->th.th_teams_microtask && !exit_teams &&
2386 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2387 team->t.t_level == master_th->th.th_teams_level + 1) {
2388 // AC: We need to leave the team structure intact at the end of parallel
2389 // inside the teams construct, so that at the next parallel same (hot) team
2390 // works, only adjust nesting levels
2391
2392 /* Decrement our nested depth level */
2393 team->t.t_level--;
2394 team->t.t_active_level--;
2395 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2396
2397 /* Restore number of threads in the team if needed */
2398 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2399 int old_num = master_th->th.th_team_nproc;
2400 int new_num = master_th->th.th_teams_size.nth;
2401 kmp_info_t **other_threads = team->t.t_threads;
2402 team->t.t_nproc = new_num;
2403 for (i = 0; i < old_num; ++i) {
2404 other_threads[i]->th.th_team_nproc = new_num;
2405 }
2406 // Adjust states of non-used threads of the team
2407 for (i = old_num; i < new_num; ++i) {
2408 // Re-initialize thread's barrier data.
2409 int b;
2410 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2411 for (b = 0; b < bs_last_barrier; ++b) {
2412 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2413 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2414#if USE_DEBUGGER
2415 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2416#endif
2417 }
2418 if (__kmp_tasking_mode != tskm_immediate_exec) {
2419 // Synchronize thread's task state
2420 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2421 }
2422 }
2423 }
2424
2425#if OMPT_SUPPORT
2426 if (ompt_enabled) {
2427 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2428 }
2429#endif
2430
2431 return;
2432 }
2433#endif /* OMP_40_ENABLED */
2434
2435 /* do cleanup and restore the parent team */
2436 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2437 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2438
2439 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2440
2441 /* jc: The following lock has instructions with REL and ACQ semantics,
2442 separating the parallel user code called in this parallel region
2443 from the serial user code called after this function returns. */
2444 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2445
2446#if OMP_40_ENABLED
2447 if (!master_th->th.th_teams_microtask ||
2448 team->t.t_level > master_th->th.th_teams_level)
2449#endif /* OMP_40_ENABLED */
2450 {
2451 /* Decrement our nested depth level */
2452 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2453 }
2454 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2455
2456#if OMPT_SUPPORT && OMPT_TRACE
2457 if (ompt_enabled) {
2458 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2459 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2460 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2461 parallel_id, task_info->task_id);
2462 }
2463 task_info->frame.exit_runtime_frame = NULL;
2464 task_info->task_id = 0;
2465 }
2466#endif
2467
2468 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2469 master_th, team));
2470 __kmp_pop_current_task_from_thread(master_th);
2471
2472#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2473 // Restore master thread's partition.
2474 master_th->th.th_first_place = team->t.t_first_place;
2475 master_th->th.th_last_place = team->t.t_last_place;
2476#endif /* OMP_40_ENABLED */
2477
2478 updateHWFPControl(team);
2479
2480 if (root->r.r_active != master_active)
2481 root->r.r_active = master_active;
2482
2483 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2484 master_th)); // this will free worker threads
2485
2486 /* this race was fun to find. make sure the following is in the critical
2487 region otherwise assertions may fail occasionally since the old team may be
2488 reallocated and the hierarchy appears inconsistent. it is actually safe to
2489 run and won't cause any bugs, but will cause those assertion failures. it's
2490 only one deref&assign so might as well put this in the critical region */
2491 master_th->th.th_team = parent_team;
2492 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2493 master_th->th.th_team_master = parent_team->t.t_threads[0];
2494 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2495
2496 /* restore serialized team, if need be */
2497 if (parent_team->t.t_serialized &&
2498 parent_team != master_th->th.th_serial_team &&
2499 parent_team != root->r.r_root_team) {
2500 __kmp_free_team(root,
2501 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2502 master_th->th.th_serial_team = parent_team;
2503 }
2504
2505 if (__kmp_tasking_mode != tskm_immediate_exec) {
2506 if (master_th->th.th_task_state_top >
2507 0) { // Restore task state from memo stack
2508 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2509 // Remember master's state if we re-use this nested hot team
2510 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2511 master_th->th.th_task_state;
2512 --master_th->th.th_task_state_top; // pop
2513 // Now restore state at this level
2514 master_th->th.th_task_state =
2515 master_th->th
2516 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2517 }
2518 // Copy the task team from the parent team to the master thread
2519 master_th->th.th_task_team =
2520 parent_team->t.t_task_team[master_th->th.th_task_state];
2521 KA_TRACE(20,
2522 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2523 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2524 parent_team));
2525 }
2526
2527 // TODO: GEH - cannot do this assertion because root thread not set up as
2528 // executing
2529 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2530 master_th->th.th_current_task->td_flags.executing = 1;
2531
2532 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2533
2534#if OMPT_SUPPORT
2535 if (ompt_enabled) {
2536 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2537 }
2538#endif
2539
2540 KMP_MB();
2541 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2542}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543
2544/* Check whether we should push an internal control record onto the
2545 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002546void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002547
Jonathan Peyton30419822017-05-12 18:01:32 +00002548 if (thread->th.th_team != thread->th.th_serial_team) {
2549 return;
2550 }
2551 if (thread->th.th_team->t.t_serialized > 1) {
2552 int push = 0;
2553
2554 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2555 push = 1;
2556 } else {
2557 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2558 thread->th.th_team->t.t_serialized) {
2559 push = 1;
2560 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002561 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002562 if (push) { /* push a record on the serial team's stack */
2563 kmp_internal_control_t *control =
2564 (kmp_internal_control_t *)__kmp_allocate(
2565 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002566
Jonathan Peyton30419822017-05-12 18:01:32 +00002567 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002568
Jonathan Peyton30419822017-05-12 18:01:32 +00002569 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002570
Jonathan Peyton30419822017-05-12 18:01:32 +00002571 control->next = thread->th.th_team->t.t_control_stack_top;
2572 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002573 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002574 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002575}
2576
2577/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002578void __kmp_set_num_threads(int new_nth, int gtid) {
2579 kmp_info_t *thread;
2580 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002581
Jonathan Peyton30419822017-05-12 18:01:32 +00002582 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2583 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002584
Jonathan Peyton30419822017-05-12 18:01:32 +00002585 if (new_nth < 1)
2586 new_nth = 1;
2587 else if (new_nth > __kmp_max_nth)
2588 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002589
Jonathan Peyton30419822017-05-12 18:01:32 +00002590 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2591 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592
Jonathan Peyton30419822017-05-12 18:01:32 +00002593 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002594
Jonathan Peyton30419822017-05-12 18:01:32 +00002595 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596
Jonathan Peyton30419822017-05-12 18:01:32 +00002597 // If this omp_set_num_threads() call will cause the hot team size to be
2598 // reduced (in the absence of a num_threads clause), then reduce it now,
2599 // rather than waiting for the next parallel region.
2600 root = thread->th.th_root;
2601 if (__kmp_init_parallel && (!root->r.r_active) &&
2602 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002603#if KMP_NESTED_HOT_TEAMS
2604 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2605#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002606 ) {
2607 kmp_team_t *hot_team = root->r.r_hot_team;
2608 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611
Jonathan Peyton30419822017-05-12 18:01:32 +00002612 // Release the extra threads we don't need any more.
2613 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2614 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2615 if (__kmp_tasking_mode != tskm_immediate_exec) {
2616 // When decreasing team size, threads no longer in the team should unref
2617 // task team.
2618 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2619 }
2620 __kmp_free_thread(hot_team->t.t_threads[f]);
2621 hot_team->t.t_threads[f] = NULL;
2622 }
2623 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002624#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002625 if (thread->th.th_hot_teams) {
2626 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2627 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2628 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002629#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002630
Jonathan Peyton30419822017-05-12 18:01:32 +00002631 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 // Update the t_nproc field in the threads that are still active.
2634 for (f = 0; f < new_nth; f++) {
2635 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2636 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002638 // Special flag in case omp_set_num_threads() call
2639 hot_team->t.t_size_changed = -1;
2640 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641}
2642
Jim Cownie5e8470a2013-09-27 10:38:44 +00002643/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002644void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2645 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002646
Jonathan Peyton30419822017-05-12 18:01:32 +00002647 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2648 "%d = (%d)\n",
2649 gtid, max_active_levels));
2650 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651
Jonathan Peyton30419822017-05-12 18:01:32 +00002652 // validate max_active_levels
2653 if (max_active_levels < 0) {
2654 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2655 // We ignore this call if the user has specified a negative value.
2656 // The current setting won't be changed. The last valid setting will be
2657 // used. A warning will be issued (if warnings are allowed as controlled by
2658 // the KMP_WARNINGS env var).
2659 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2660 "max_active_levels for thread %d = (%d)\n",
2661 gtid, max_active_levels));
2662 return;
2663 }
2664 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2665 // it's OK, the max_active_levels is within the valid range: [ 0;
2666 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2667 // We allow a zero value. (implementation defined behavior)
2668 } else {
2669 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2670 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2671 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2672 // Current upper limit is MAX_INT. (implementation defined behavior)
2673 // If the input exceeds the upper limit, we correct the input to be the
2674 // upper limit. (implementation defined behavior)
2675 // Actually, the flow should never get here until we use MAX_INT limit.
2676 }
2677 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2678 "max_active_levels for thread %d = (%d)\n",
2679 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682
Jonathan Peyton30419822017-05-12 18:01:32 +00002683 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684
Jonathan Peyton30419822017-05-12 18:01:32 +00002685 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686}
2687
2688/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002689int __kmp_get_max_active_levels(int gtid) {
2690 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691
Jonathan Peyton30419822017-05-12 18:01:32 +00002692 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2693 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002694
Jonathan Peyton30419822017-05-12 18:01:32 +00002695 thread = __kmp_threads[gtid];
2696 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2697 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2698 "curtask_maxaclevel=%d\n",
2699 gtid, thread->th.th_current_task,
2700 thread->th.th_current_task->td_icvs.max_active_levels));
2701 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002702}
2703
2704/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002705void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2706 kmp_info_t *thread;
2707 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002708
Jonathan Peyton30419822017-05-12 18:01:32 +00002709 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2710 gtid, (int)kind, chunk));
2711 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712
Jonathan Peyton30419822017-05-12 18:01:32 +00002713 // Check if the kind parameter is valid, correct if needed.
2714 // Valid parameters should fit in one of two intervals - standard or extended:
2715 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2716 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2717 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2718 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2719 // TODO: Hint needs attention in case we change the default schedule.
2720 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2721 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2722 __kmp_msg_null);
2723 kind = kmp_sched_default;
2724 chunk = 0; // ignore chunk value in case of bad kind
2725 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726
Jonathan Peyton30419822017-05-12 18:01:32 +00002727 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728
Jonathan Peyton30419822017-05-12 18:01:32 +00002729 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002730
Jonathan Peyton30419822017-05-12 18:01:32 +00002731 if (kind < kmp_sched_upper_std) {
2732 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2733 // differ static chunked vs. unchunked: chunk should be invalid to
2734 // indicate unchunked schedule (which is the default)
2735 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002737 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2738 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002740 } else {
2741 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2742 // kmp_sched_lower - 2 ];
2743 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2744 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2745 kmp_sched_lower - 2];
2746 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002747 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 // ignore parameter chunk for schedule auto
2749 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2750 } else {
2751 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2752 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753}
2754
2755/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002756void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2757 kmp_info_t *thread;
2758 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002759
Jonathan Peyton30419822017-05-12 18:01:32 +00002760 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2761 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002762
Jonathan Peyton30419822017-05-12 18:01:32 +00002763 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002764
Jonathan Peyton30419822017-05-12 18:01:32 +00002765 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002766
Jonathan Peyton30419822017-05-12 18:01:32 +00002767 switch (th_type) {
2768 case kmp_sch_static:
2769 case kmp_sch_static_greedy:
2770 case kmp_sch_static_balanced:
2771 *kind = kmp_sched_static;
2772 *chunk = 0; // chunk was not set, try to show this fact via zero value
2773 return;
2774 case kmp_sch_static_chunked:
2775 *kind = kmp_sched_static;
2776 break;
2777 case kmp_sch_dynamic_chunked:
2778 *kind = kmp_sched_dynamic;
2779 break;
2780 case kmp_sch_guided_chunked:
2781 case kmp_sch_guided_iterative_chunked:
2782 case kmp_sch_guided_analytical_chunked:
2783 *kind = kmp_sched_guided;
2784 break;
2785 case kmp_sch_auto:
2786 *kind = kmp_sched_auto;
2787 break;
2788 case kmp_sch_trapezoidal:
2789 *kind = kmp_sched_trapezoidal;
2790 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002791#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002792 case kmp_sch_static_steal:
2793 *kind = kmp_sched_static_steal;
2794 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002795#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 default:
2797 KMP_FATAL(UnknownSchedulingType, th_type);
2798 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799
Jonathan Peyton30419822017-05-12 18:01:32 +00002800 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801}
2802
Jonathan Peyton30419822017-05-12 18:01:32 +00002803int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002804
Jonathan Peyton30419822017-05-12 18:01:32 +00002805 int ii, dd;
2806 kmp_team_t *team;
2807 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002808
Jonathan Peyton30419822017-05-12 18:01:32 +00002809 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2810 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002811
Jonathan Peyton30419822017-05-12 18:01:32 +00002812 // validate level
2813 if (level == 0)
2814 return 0;
2815 if (level < 0)
2816 return -1;
2817 thr = __kmp_threads[gtid];
2818 team = thr->th.th_team;
2819 ii = team->t.t_level;
2820 if (level > ii)
2821 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002822
2823#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002824 if (thr->th.th_teams_microtask) {
2825 // AC: we are in teams region where multiple nested teams have same level
2826 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2827 if (level <=
2828 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2829 KMP_DEBUG_ASSERT(ii >= tlevel);
2830 // AC: As we need to pass by the teams league, we need to artificially
2831 // increase ii
2832 if (ii == tlevel) {
2833 ii += 2; // three teams have same level
2834 } else {
2835 ii++; // two teams have same level
2836 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002837 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002838 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839#endif
2840
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 if (ii == level)
2842 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002843
Jonathan Peyton30419822017-05-12 18:01:32 +00002844 dd = team->t.t_serialized;
2845 level++;
2846 while (ii > level) {
2847 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002849 if ((team->t.t_serialized) && (!dd)) {
2850 team = team->t.t_parent;
2851 continue;
2852 }
2853 if (ii > level) {
2854 team = team->t.t_parent;
2855 dd = team->t.t_serialized;
2856 ii--;
2857 }
2858 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859
Jonathan Peyton30419822017-05-12 18:01:32 +00002860 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002861}
2862
Jonathan Peyton30419822017-05-12 18:01:32 +00002863int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002864
Jonathan Peyton30419822017-05-12 18:01:32 +00002865 int ii, dd;
2866 kmp_team_t *team;
2867 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868
Jonathan Peyton30419822017-05-12 18:01:32 +00002869 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2870 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002871
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 // validate level
2873 if (level == 0)
2874 return 1;
2875 if (level < 0)
2876 return -1;
2877 thr = __kmp_threads[gtid];
2878 team = thr->th.th_team;
2879 ii = team->t.t_level;
2880 if (level > ii)
2881 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882
2883#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002884 if (thr->th.th_teams_microtask) {
2885 // AC: we are in teams region where multiple nested teams have same level
2886 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2887 if (level <=
2888 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2889 KMP_DEBUG_ASSERT(ii >= tlevel);
2890 // AC: As we need to pass by the teams league, we need to artificially
2891 // increase ii
2892 if (ii == tlevel) {
2893 ii += 2; // three teams have same level
2894 } else {
2895 ii++; // two teams have same level
2896 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002898 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002899#endif
2900
Jonathan Peyton30419822017-05-12 18:01:32 +00002901 while (ii > level) {
2902 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002904 if (team->t.t_serialized && (!dd)) {
2905 team = team->t.t_parent;
2906 continue;
2907 }
2908 if (ii > level) {
2909 team = team->t.t_parent;
2910 ii--;
2911 }
2912 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002913
Jonathan Peyton30419822017-05-12 18:01:32 +00002914 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915}
2916
Jonathan Peyton30419822017-05-12 18:01:32 +00002917kmp_r_sched_t __kmp_get_schedule_global() {
2918 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2919 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2920 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002921
Jonathan Peyton30419822017-05-12 18:01:32 +00002922 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002923
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2925 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2926 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2927 // different roots (even in OMP 2.5)
2928 if (__kmp_sched == kmp_sch_static) {
2929 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed
2930 // schedule (balanced or greedy)
2931 } else if (__kmp_sched == kmp_sch_guided_chunked) {
2932 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed
2933 // schedule (iterative or analytical)
2934 } else {
2935 r_sched.r_sched_type =
2936 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2937 }
2938
2939 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it
2940 // was not ever set)
2941 r_sched.chunk = KMP_DEFAULT_CHUNK;
2942 } else {
2943 r_sched.chunk = __kmp_chunk;
2944 }
2945
2946 return r_sched;
2947}
2948
2949/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2950 at least argc number of *t_argv entries for the requested team. */
2951static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2952
2953 KMP_DEBUG_ASSERT(team);
2954 if (!realloc || argc > team->t.t_max_argc) {
2955
2956 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
2957 "current entries=%d\n",
2958 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2959 /* if previously allocated heap space for args, free them */
2960 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2961 __kmp_free((void *)team->t.t_argv);
2962
2963 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
2964 /* use unused space in the cache line for arguments */
2965 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2966 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
2967 "argv entries\n",
2968 team->t.t_id, team->t.t_max_argc));
2969 team->t.t_argv = &team->t.t_inline_argv[0];
2970 if (__kmp_storage_map) {
2971 __kmp_print_storage_map_gtid(
2972 -1, &team->t.t_inline_argv[0],
2973 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2974 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
2975 team->t.t_id);
2976 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002978 /* allocate space for arguments in the heap */
2979 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
2980 ? KMP_MIN_MALLOC_ARGV_ENTRIES
2981 : 2 * argc;
2982 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
2983 "argv entries\n",
2984 team->t.t_id, team->t.t_max_argc));
2985 team->t.t_argv =
2986 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
2987 if (__kmp_storage_map) {
2988 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
2989 &team->t.t_argv[team->t.t_max_argc],
2990 sizeof(void *) * team->t.t_max_argc,
2991 "team_%d.t_argv", team->t.t_id);
2992 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002993 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002994 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002995}
2996
Jonathan Peyton30419822017-05-12 18:01:32 +00002997static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
2998 int i;
2999 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3000 team->t.t_threads =
3001 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3002 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3003 sizeof(dispatch_shared_info_t) * num_disp_buff);
3004 team->t.t_dispatch =
3005 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3006 team->t.t_implicit_task_taskdata =
3007 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3008 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009
Jonathan Peyton30419822017-05-12 18:01:32 +00003010 /* setup dispatch buffers */
3011 for (i = 0; i < num_disp_buff; ++i) {
3012 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003013#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003014 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003015#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003016 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003017}
3018
Jonathan Peyton30419822017-05-12 18:01:32 +00003019static void __kmp_free_team_arrays(kmp_team_t *team) {
3020 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3021 int i;
3022 for (i = 0; i < team->t.t_max_nproc; ++i) {
3023 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3024 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3025 team->t.t_dispatch[i].th_disp_buffer = NULL;
3026 }; // if
3027 }; // for
3028 __kmp_free(team->t.t_threads);
3029 __kmp_free(team->t.t_disp_buffer);
3030 __kmp_free(team->t.t_dispatch);
3031 __kmp_free(team->t.t_implicit_task_taskdata);
3032 team->t.t_threads = NULL;
3033 team->t.t_disp_buffer = NULL;
3034 team->t.t_dispatch = NULL;
3035 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036}
3037
Jonathan Peyton30419822017-05-12 18:01:32 +00003038static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3039 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003040
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 __kmp_free(team->t.t_disp_buffer);
3042 __kmp_free(team->t.t_dispatch);
3043 __kmp_free(team->t.t_implicit_task_taskdata);
3044 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003045
Jonathan Peyton30419822017-05-12 18:01:32 +00003046 KMP_MEMCPY(team->t.t_threads, oldThreads,
3047 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048
Jonathan Peyton30419822017-05-12 18:01:32 +00003049 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050}
3051
Jonathan Peyton30419822017-05-12 18:01:32 +00003052static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003053
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 kmp_r_sched_t r_sched =
3055 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056
3057#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003058 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003059#endif /* OMP_40_ENABLED */
3060
Jonathan Peyton30419822017-05-12 18:01:32 +00003061 kmp_internal_control_t g_icvs = {
3062 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3063 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3064 // for nested parallelism (per thread)
3065 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3066 // adjustment of threads (per thread)
3067 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3068 // whether blocktime is explicitly set
3069 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003070#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003071 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3072// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003073#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003074 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3075 // next parallel region (per thread)
3076 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3077 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3078 // for max_active_levels
3079 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3080// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003081#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003082 __kmp_nested_proc_bind.bind_types[0],
3083 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003084#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003085 NULL // struct kmp_internal_control *next;
3086 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087
Jonathan Peyton30419822017-05-12 18:01:32 +00003088 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089}
3090
Jonathan Peyton30419822017-05-12 18:01:32 +00003091static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092
Jonathan Peyton30419822017-05-12 18:01:32 +00003093 kmp_internal_control_t gx_icvs;
3094 gx_icvs.serial_nesting_level =
3095 0; // probably =team->t.t_serial like in save_inter_controls
3096 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3097 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003098
Jonathan Peyton30419822017-05-12 18:01:32 +00003099 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100}
3101
Jonathan Peyton30419822017-05-12 18:01:32 +00003102static void __kmp_initialize_root(kmp_root_t *root) {
3103 int f;
3104 kmp_team_t *root_team;
3105 kmp_team_t *hot_team;
3106 int hot_team_max_nth;
3107 kmp_r_sched_t r_sched =
3108 __kmp_get_schedule_global(); // get current state of scheduling globals
3109 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3110 KMP_DEBUG_ASSERT(root);
3111 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003112
Jonathan Peyton30419822017-05-12 18:01:32 +00003113 /* setup the root state structure */
3114 __kmp_init_lock(&root->r.r_begin_lock);
3115 root->r.r_begin = FALSE;
3116 root->r.r_active = FALSE;
3117 root->r.r_in_parallel = 0;
3118 root->r.r_blocktime = __kmp_dflt_blocktime;
3119 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003120
Jonathan Peyton30419822017-05-12 18:01:32 +00003121 /* setup the root team for this task */
3122 /* allocate the root team structure */
3123 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003124
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 root_team =
3126 __kmp_allocate_team(root,
3127 1, // new_nproc
3128 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003129#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003130 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003131#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003132#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003133 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003134#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003135 &r_icvs,
3136 0 // argc
3137 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3138 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003139#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003140 // Non-NULL value should be assigned to make the debugger display the root
3141 // team.
3142 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003143#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144
Jonathan Peyton30419822017-05-12 18:01:32 +00003145 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146
Jonathan Peyton30419822017-05-12 18:01:32 +00003147 root->r.r_root_team = root_team;
3148 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003149
Jonathan Peyton30419822017-05-12 18:01:32 +00003150 /* initialize root team */
3151 root_team->t.t_threads[0] = NULL;
3152 root_team->t.t_nproc = 1;
3153 root_team->t.t_serialized = 1;
3154 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3155 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3156 root_team->t.t_sched.chunk = r_sched.chunk;
3157 KA_TRACE(
3158 20,
3159 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3160 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 /* setup the hot team for this task */
3163 /* allocate the hot team structure */
3164 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003165
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 hot_team =
3167 __kmp_allocate_team(root,
3168 1, // new_nproc
3169 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003170#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003171 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003172#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003175#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003176 &r_icvs,
3177 0 // argc
3178 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3179 );
3180 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003181
Jonathan Peyton30419822017-05-12 18:01:32 +00003182 root->r.r_hot_team = hot_team;
3183 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003184
Jonathan Peyton30419822017-05-12 18:01:32 +00003185 /* first-time initialization */
3186 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003187
Jonathan Peyton30419822017-05-12 18:01:32 +00003188 /* initialize hot team */
3189 hot_team_max_nth = hot_team->t.t_max_nproc;
3190 for (f = 0; f < hot_team_max_nth; ++f) {
3191 hot_team->t.t_threads[f] = NULL;
3192 }; // for
3193 hot_team->t.t_nproc = 1;
3194 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3195 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3196 hot_team->t.t_sched.chunk = r_sched.chunk;
3197 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003198}
3199
3200#ifdef KMP_DEBUG
3201
Jim Cownie5e8470a2013-09-27 10:38:44 +00003202typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003203 kmp_team_p const *entry;
3204 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003206typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003207
Jonathan Peyton30419822017-05-12 18:01:32 +00003208static void __kmp_print_structure_team_accum( // Add team to list of teams.
3209 kmp_team_list_t list, // List of teams.
3210 kmp_team_p const *team // Team to add.
3211 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003212
Jonathan Peyton30419822017-05-12 18:01:32 +00003213 // List must terminate with item where both entry and next are NULL.
3214 // Team is added to the list only once.
3215 // List is sorted in ascending order by team id.
3216 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217
Jonathan Peyton30419822017-05-12 18:01:32 +00003218 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003219
Jonathan Peyton30419822017-05-12 18:01:32 +00003220 KMP_DEBUG_ASSERT(list != NULL);
3221 if (team == NULL) {
3222 return;
3223 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224
Jonathan Peyton30419822017-05-12 18:01:32 +00003225 __kmp_print_structure_team_accum(list, team->t.t_parent);
3226 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003227
Jonathan Peyton30419822017-05-12 18:01:32 +00003228 // Search list for the team.
3229 l = list;
3230 while (l->next != NULL && l->entry != team) {
3231 l = l->next;
3232 }; // while
3233 if (l->next != NULL) {
3234 return; // Team has been added before, exit.
3235 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003236
Jonathan Peyton30419822017-05-12 18:01:32 +00003237 // Team is not found. Search list again for insertion point.
3238 l = list;
3239 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3240 l = l->next;
3241 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003242
Jonathan Peyton30419822017-05-12 18:01:32 +00003243 // Insert team.
3244 {
3245 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3246 sizeof(kmp_team_list_item_t));
3247 *item = *l;
3248 l->entry = team;
3249 l->next = item;
3250 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003251}
3252
Jonathan Peyton30419822017-05-12 18:01:32 +00003253static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003254
Jonathan Peyton30419822017-05-12 18:01:32 +00003255 ) {
3256 __kmp_printf("%s", title);
3257 if (team != NULL) {
3258 __kmp_printf("%2x %p\n", team->t.t_id, team);
3259 } else {
3260 __kmp_printf(" - (nil)\n");
3261 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003262}
3263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264static void __kmp_print_structure_thread(char const *title,
3265 kmp_info_p const *thread) {
3266 __kmp_printf("%s", title);
3267 if (thread != NULL) {
3268 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3269 } else {
3270 __kmp_printf(" - (nil)\n");
3271 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003272}
3273
Jonathan Peyton30419822017-05-12 18:01:32 +00003274void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003275
Jonathan Peyton30419822017-05-12 18:01:32 +00003276 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003277
Jonathan Peyton30419822017-05-12 18:01:32 +00003278 // Initialize list of teams.
3279 list =
3280 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3281 list->entry = NULL;
3282 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003283
Jonathan Peyton30419822017-05-12 18:01:32 +00003284 __kmp_printf("\n------------------------------\nGlobal Thread "
3285 "Table\n------------------------------\n");
3286 {
3287 int gtid;
3288 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3289 __kmp_printf("%2d", gtid);
3290 if (__kmp_threads != NULL) {
3291 __kmp_printf(" %p", __kmp_threads[gtid]);
3292 }; // if
3293 if (__kmp_root != NULL) {
3294 __kmp_printf(" %p", __kmp_root[gtid]);
3295 }; // if
3296 __kmp_printf("\n");
3297 }; // for gtid
3298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299
Jonathan Peyton30419822017-05-12 18:01:32 +00003300 // Print out __kmp_threads array.
3301 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3302 "----------\n");
3303 if (__kmp_threads != NULL) {
3304 int gtid;
3305 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3306 kmp_info_t const *thread = __kmp_threads[gtid];
3307 if (thread != NULL) {
3308 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3309 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3310 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3311 __kmp_print_structure_team(" Serial Team: ",
3312 thread->th.th_serial_team);
3313 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3314 __kmp_print_structure_thread(" Master: ",
3315 thread->th.th_team_master);
3316 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3317 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003319 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003320#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003321 __kmp_print_structure_thread(" Next in pool: ",
3322 thread->th.th_next_pool);
3323 __kmp_printf("\n");
3324 __kmp_print_structure_team_accum(list, thread->th.th_team);
3325 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3326 }; // if
3327 }; // for gtid
3328 } else {
3329 __kmp_printf("Threads array is not allocated.\n");
3330 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003331
Jonathan Peyton30419822017-05-12 18:01:32 +00003332 // Print out __kmp_root array.
3333 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3334 "--------\n");
3335 if (__kmp_root != NULL) {
3336 int gtid;
3337 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3338 kmp_root_t const *root = __kmp_root[gtid];
3339 if (root != NULL) {
3340 __kmp_printf("GTID %2d %p:\n", gtid, root);
3341 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3342 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3343 __kmp_print_structure_thread(" Uber Thread: ",
3344 root->r.r_uber_thread);
3345 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3346 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
3347 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel);
3348 __kmp_printf("\n");
3349 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3350 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3351 }; // if
3352 }; // for gtid
3353 } else {
3354 __kmp_printf("Ubers array is not allocated.\n");
3355 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003356
Jonathan Peyton30419822017-05-12 18:01:32 +00003357 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3358 "--------\n");
3359 while (list->next != NULL) {
3360 kmp_team_p const *team = list->entry;
3361 int i;
3362 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3363 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3364 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3365 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3366 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3367 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3368 for (i = 0; i < team->t.t_nproc; ++i) {
3369 __kmp_printf(" Thread %2d: ", i);
3370 __kmp_print_structure_thread("", team->t.t_threads[i]);
3371 }; // for i
3372 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3373 __kmp_printf("\n");
3374 list = list->next;
3375 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003376
Jonathan Peyton30419822017-05-12 18:01:32 +00003377 // Print out __kmp_thread_pool and __kmp_team_pool.
3378 __kmp_printf("\n------------------------------\nPools\n----------------------"
3379 "--------\n");
3380 __kmp_print_structure_thread("Thread pool: ",
3381 (kmp_info_t *)__kmp_thread_pool);
3382 __kmp_print_structure_team("Team pool: ",
3383 (kmp_team_t *)__kmp_team_pool);
3384 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386 // Free team list.
3387 while (list != NULL) {
3388 kmp_team_list_item_t *item = list;
3389 list = list->next;
3390 KMP_INTERNAL_FREE(item);
3391 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003392}
3393
3394#endif
3395
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396//---------------------------------------------------------------------------
3397// Stuff for per-thread fast random number generator
3398// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003399static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003400 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3401 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3402 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3403 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3404 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3405 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3406 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3407 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3408 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3409 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3410 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003411
3412//---------------------------------------------------------------------------
3413// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003414unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003415 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419
3420 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003421 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003422
3423 return r;
3424}
3425//--------------------------------------------------------
3426// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003427void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003428 unsigned seed = thread->th.th_info.ds.ds_tid;
3429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 thread->th.th_a =
3431 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3432 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3433 KA_TRACE(30,
3434 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003435}
3436
Jim Cownie5e8470a2013-09-27 10:38:44 +00003437#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003438/* reclaim array entries for root threads that are already dead, returns number
3439 * reclaimed */
3440static int __kmp_reclaim_dead_roots(void) {
3441 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 for (i = 0; i < __kmp_threads_capacity; ++i) {
3444 if (KMP_UBER_GTID(i) &&
3445 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3446 !__kmp_root[i]
3447 ->r.r_active) { // AC: reclaim only roots died in non-active state
3448 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003449 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003450 }
3451 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003452}
3453#endif
3454
Jonathan Peyton30419822017-05-12 18:01:32 +00003455/* This function attempts to create free entries in __kmp_threads and
3456 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003457
Jonathan Peyton30419822017-05-12 18:01:32 +00003458 For Windows* OS static library, the first mechanism used is to reclaim array
3459 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003460
Jonathan Peyton30419822017-05-12 18:01:32 +00003461 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3462 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3463 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3464 threadprivate cache array has been created. Synchronization with
3465 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
Jonathan Peyton30419822017-05-12 18:01:32 +00003467 After any dead root reclamation, if the clipping value allows array expansion
3468 to result in the generation of a total of nWish free slots, the function does
3469 that expansion. If not, but the clipping value allows array expansion to
3470 result in the generation of a total of nNeed free slots, the function does
3471 that expansion. Otherwise, nothing is done beyond the possible initial root
3472 thread reclamation. However, if nNeed is zero, a best-effort attempt is made
3473 to fulfil nWish as far as possible, i.e. the function will attempt to create
Jim Cownie5e8470a2013-09-27 10:38:44 +00003474 as many free slots as possible up to nWish.
3475
Jonathan Peyton30419822017-05-12 18:01:32 +00003476 If any argument is negative, the behavior is undefined. */
3477static int __kmp_expand_threads(int nWish, int nNeed) {
3478 int added = 0;
3479 int old_tp_cached;
3480 int __kmp_actual_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481
Jonathan Peyton30419822017-05-12 18:01:32 +00003482 if (nNeed > nWish) /* normalize the arguments */
3483 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003484#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 /* only for Windows static library */
3486 /* reclaim array entries for root threads that are already dead */
3487 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003488
Jonathan Peyton30419822017-05-12 18:01:32 +00003489 if (nNeed) {
3490 nNeed -= added;
3491 if (nNeed < 0)
3492 nNeed = 0;
3493 }
3494 if (nWish) {
3495 nWish -= added;
3496 if (nWish < 0)
3497 nWish = 0;
3498 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003499#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003500 if (nWish <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003502
3503 while (1) {
3504 int nTarget;
3505 int minimumRequiredCapacity;
3506 int newCapacity;
3507 kmp_info_t **newThreads;
3508 kmp_root_t **newRoot;
3509
3510 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3511 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3512 // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become
3513 // > __kmp_max_nth in one of two ways:
3514 //
3515 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3516 // may not be resused by another thread, so we may need to increase
3517 // __kmp_threads_capacity to __kmp_max_threads + 1.
3518 //
3519 // 2) New foreign root(s) are encountered. We always register new foreign
3520 // roots. This may cause a smaller # of threads to be allocated at
3521 // subsequent parallel regions, but the worker threads hang around (and
3522 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3523 //
3524 // Anyway, that is the reason for moving the check to see if
3525 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3526 // instead of having it performed here. -BB
3527 old_tp_cached = __kmp_tp_cached;
3528 __kmp_actual_max_nth =
3529 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3530 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3531
3532 /* compute expansion headroom to check if we can expand and whether to aim
3533 for nWish or nNeed */
3534 nTarget = nWish;
3535 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3536 /* can't fulfil nWish, so try nNeed */
3537 if (nNeed) {
3538 nTarget = nNeed;
3539 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3540 /* possible expansion too small -- give up */
3541 break;
3542 }
3543 } else {
3544 /* best-effort */
3545 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3546 if (!nTarget) {
3547 /* can expand at all -- give up */
3548 break;
3549 }
3550 }
3551 }
3552 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3553
3554 newCapacity = __kmp_threads_capacity;
3555 do {
3556 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3557 ? (newCapacity << 1)
3558 : __kmp_actual_max_nth;
3559 } while (newCapacity < minimumRequiredCapacity);
3560 newThreads = (kmp_info_t **)__kmp_allocate(
3561 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity +
3562 CACHE_LINE);
3563 newRoot = (kmp_root_t **)((char *)newThreads +
3564 sizeof(kmp_info_t *) * newCapacity);
3565 KMP_MEMCPY(newThreads, __kmp_threads,
3566 __kmp_threads_capacity * sizeof(kmp_info_t *));
3567 KMP_MEMCPY(newRoot, __kmp_root,
3568 __kmp_threads_capacity * sizeof(kmp_root_t *));
3569 memset(newThreads + __kmp_threads_capacity, 0,
3570 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *));
3571 memset(newRoot + __kmp_threads_capacity, 0,
3572 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *));
3573
3574 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3575 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has
3576 allocated a threadprivate cache while we were allocating the expanded
3577 array, and our new capacity is larger than the threadprivate cache
3578 capacity, so we should deallocate the expanded arrays and try again.
3579 This is the first check of a double-check pair. */
3580 __kmp_free(newThreads);
3581 continue; /* start over and try again */
3582 }
3583 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3584 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3585 /* Same check as above, but this time with the lock so we can be sure if
3586 we can succeed. */
3587 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3588 __kmp_free(newThreads);
3589 continue; /* start over and try again */
3590 } else {
3591 /* success */
3592 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be
3593 // investigated.
3594 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3595 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3596 added += newCapacity - __kmp_threads_capacity;
3597 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3598 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3599 break; /* succeeded, so we can exit the loop */
3600 }
3601 }
3602 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003603}
3604
Jonathan Peyton30419822017-05-12 18:01:32 +00003605/* Register the current thread as a root thread and obtain our gtid. We must
3606 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3607 thread that calls from __kmp_do_serial_initialize() */
3608int __kmp_register_root(int initial_thread) {
3609 kmp_info_t *root_thread;
3610 kmp_root_t *root;
3611 int gtid;
3612 int capacity;
3613 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3614 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3615 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003616
Jonathan Peyton30419822017-05-12 18:01:32 +00003617 /* 2007-03-02:
3618 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3619 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3620 work as expected -- it may return false (that means there is at least one
3621 empty slot in __kmp_threads array), but it is possible the only free slot
3622 is #0, which is reserved for initial thread and so cannot be used for this
3623 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 However, right solution seems to be not reserving slot #0 for initial
3626 thread because:
3627 (1) there is no magic in slot #0,
3628 (2) we cannot detect initial thread reliably (the first thread which does
3629 serial initialization may be not a real initial thread).
3630 */
3631 capacity = __kmp_threads_capacity;
3632 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3633 --capacity;
3634 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003635
Jonathan Peyton30419822017-05-12 18:01:32 +00003636 /* see if there are too many threads */
3637 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3638 if (__kmp_tp_cached) {
3639 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3640 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3641 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3642 } else {
3643 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3644 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003646 }; // if
3647
3648 /* find an available thread slot */
3649 /* Don't reassign the zero slot since we need that to only be used by initial
3650 thread */
3651 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3652 gtid++)
3653 ;
3654 KA_TRACE(1,
3655 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3656 KMP_ASSERT(gtid < __kmp_threads_capacity);
3657
3658 /* update global accounting */
3659 __kmp_all_nth++;
3660 TCW_4(__kmp_nth, __kmp_nth + 1);
3661
3662 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3663 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3664 if (__kmp_adjust_gtid_mode) {
3665 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3666 if (TCR_4(__kmp_gtid_mode) != 2) {
3667 TCW_4(__kmp_gtid_mode, 2);
3668 }
3669 } else {
3670 if (TCR_4(__kmp_gtid_mode) != 1) {
3671 TCW_4(__kmp_gtid_mode, 1);
3672 }
3673 }
3674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675
3676#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003677 /* Adjust blocktime to zero if necessary */
3678 /* Middle initialization might not have occurred yet */
3679 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3680 if (__kmp_nth > __kmp_avail_proc) {
3681 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003684#endif /* KMP_ADJUST_BLOCKTIME */
3685
Jonathan Peyton30419822017-05-12 18:01:32 +00003686 /* setup this new hierarchy */
3687 if (!(root = __kmp_root[gtid])) {
3688 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3689 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003691
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003692#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003693 // Initialize stats as soon as possible (right after gtid assignment).
3694 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3695 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3696 KMP_SET_THREAD_STATE(SERIAL_REGION);
3697 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003698#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003699 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700
Jonathan Peyton30419822017-05-12 18:01:32 +00003701 /* setup new root thread structure */
3702 if (root->r.r_uber_thread) {
3703 root_thread = root->r.r_uber_thread;
3704 } else {
3705 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3706 if (__kmp_storage_map) {
3707 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003708 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003709 root_thread->th.th_info.ds.ds_gtid = gtid;
3710 root_thread->th.th_root = root;
3711 if (__kmp_env_consistency_check) {
3712 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3713 }
3714#if USE_FAST_MEMORY
3715 __kmp_initialize_fast_memory(root_thread);
3716#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003717
Jonathan Peyton30419822017-05-12 18:01:32 +00003718#if KMP_USE_BGET
3719 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3720 __kmp_initialize_bget(root_thread);
3721#endif
3722 __kmp_init_random(root_thread); // Initialize random number generator
3723 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003724
Jonathan Peyton30419822017-05-12 18:01:32 +00003725 /* setup the serial team held in reserve by the root thread */
3726 if (!root_thread->th.th_serial_team) {
3727 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3728 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3729 root_thread->th.th_serial_team =
3730 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003731#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003732 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003733#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003735 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003737 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3738 }
3739 KMP_ASSERT(root_thread->th.th_serial_team);
3740 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3741 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003742
Jonathan Peyton30419822017-05-12 18:01:32 +00003743 /* drop root_thread into place */
3744 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
Jonathan Peyton30419822017-05-12 18:01:32 +00003746 root->r.r_root_team->t.t_threads[0] = root_thread;
3747 root->r.r_hot_team->t.t_threads[0] = root_thread;
3748 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3749 // AC: the team created in reserve, not for execution (it is unused for now).
3750 root_thread->th.th_serial_team->t.t_serialized = 0;
3751 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 /* initialize the thread, get it ready to go */
3754 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3755 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003756
Jonathan Peyton30419822017-05-12 18:01:32 +00003757 /* prepare the master thread for get_gtid() */
3758 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003759
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003760#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003761 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003762#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003763
Jonathan Peyton30419822017-05-12 18:01:32 +00003764#ifdef KMP_TDATA_GTID
3765 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003766#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003767 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3768 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3769
3770 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3771 "plain=%u\n",
3772 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3773 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3774 KMP_INIT_BARRIER_STATE));
3775 { // Initialize barrier data.
3776 int b;
3777 for (b = 0; b < bs_last_barrier; ++b) {
3778 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3779#if USE_DEBUGGER
3780 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3781#endif
3782 }; // for
3783 }
3784 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3785 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003786
Alp Toker763b9392014-02-28 09:42:41 +00003787#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003788#if OMP_40_ENABLED
3789 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3790 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3792 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3793#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003794
Jonathan Peyton30419822017-05-12 18:01:32 +00003795 if (TCR_4(__kmp_init_middle)) {
3796 __kmp_affinity_set_init_mask(gtid, TRUE);
3797 }
Alp Toker763b9392014-02-28 09:42:41 +00003798#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003801
Jonathan Peyton30419822017-05-12 18:01:32 +00003802 KMP_MB();
3803 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003804
Jonathan Peyton30419822017-05-12 18:01:32 +00003805 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003806}
3807
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003808#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003809static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3810 const int max_level) {
3811 int i, n, nth;
3812 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3813 if (!hot_teams || !hot_teams[level].hot_team) {
3814 return 0;
3815 }
3816 KMP_DEBUG_ASSERT(level < max_level);
3817 kmp_team_t *team = hot_teams[level].hot_team;
3818 nth = hot_teams[level].hot_team_nth;
3819 n = nth - 1; // master is not freed
3820 if (level < max_level - 1) {
3821 for (i = 0; i < nth; ++i) {
3822 kmp_info_t *th = team->t.t_threads[i];
3823 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3824 if (i > 0 && th->th.th_hot_teams) {
3825 __kmp_free(th->th.th_hot_teams);
3826 th->th.th_hot_teams = NULL;
3827 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003828 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003829 }
3830 __kmp_free_team(root, team, NULL);
3831 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003832}
3833#endif
3834
Jonathan Peyton30419822017-05-12 18:01:32 +00003835// Resets a root thread and clear its root and hot teams.
3836// Returns the number of __kmp_threads entries directly and indirectly freed.
3837static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3838 kmp_team_t *root_team = root->r.r_root_team;
3839 kmp_team_t *hot_team = root->r.r_hot_team;
3840 int n = hot_team->t.t_nproc;
3841 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jonathan Peyton30419822017-05-12 18:01:32 +00003843 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003844
Jonathan Peyton30419822017-05-12 18:01:32 +00003845 root->r.r_root_team = NULL;
3846 root->r.r_hot_team = NULL;
3847 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3848 // before call to __kmp_free_team().
3849 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003850#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 if (__kmp_hot_teams_max_level >
3852 0) { // need to free nested hot teams and their threads if any
3853 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3854 kmp_info_t *th = hot_team->t.t_threads[i];
3855 if (__kmp_hot_teams_max_level > 1) {
3856 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3857 }
3858 if (th->th.th_hot_teams) {
3859 __kmp_free(th->th.th_hot_teams);
3860 th->th.th_hot_teams = NULL;
3861 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003862 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003863 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003864#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003865 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
Jonathan Peyton30419822017-05-12 18:01:32 +00003867 // Before we can reap the thread, we need to make certain that all other
3868 // threads in the teams that had this root as ancestor have stopped trying to
3869 // steal tasks.
3870 if (__kmp_tasking_mode != tskm_immediate_exec) {
3871 __kmp_wait_to_unref_task_teams();
3872 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003873
Jonathan Peyton30419822017-05-12 18:01:32 +00003874#if KMP_OS_WINDOWS
3875 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3876 KA_TRACE(
3877 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3878 "\n",
3879 (LPVOID) & (root->r.r_uber_thread->th),
3880 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3881 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3882#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003883
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003884#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003885 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3886 int gtid = __kmp_get_gtid();
3887 __ompt_thread_end(ompt_thread_initial, gtid);
3888 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003889#endif
3890
Jonathan Peyton30419822017-05-12 18:01:32 +00003891 TCW_4(__kmp_nth,
3892 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3893 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003894
Jonathan Peyton30419822017-05-12 18:01:32 +00003895 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3896 // of freeing.
3897 root->r.r_uber_thread = NULL;
3898 /* mark root as no longer in use */
3899 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003900
Jonathan Peyton30419822017-05-12 18:01:32 +00003901 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003902}
3903
Jonathan Peyton30419822017-05-12 18:01:32 +00003904void __kmp_unregister_root_current_thread(int gtid) {
3905 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3906 /* this lock should be ok, since unregister_root_current_thread is never
3907 called during an abort, only during a normal close. furthermore, if you
3908 have the forkjoin lock, you should never try to get the initz lock */
3909 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3910 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3911 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3912 "exiting T#%d\n",
3913 gtid));
3914 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3915 return;
3916 }
3917 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003918
Jonathan Peyton30419822017-05-12 18:01:32 +00003919 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3920 KMP_ASSERT(KMP_UBER_GTID(gtid));
3921 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3922 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003923
Jonathan Peyton30419822017-05-12 18:01:32 +00003924 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003925
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003926#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003927 kmp_info_t *thread = __kmp_threads[gtid];
3928 kmp_team_t *team = thread->th.th_team;
3929 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003930
Jonathan Peyton30419822017-05-12 18:01:32 +00003931 // we need to wait for the proxy tasks before finishing the thread
3932 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003933#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003934 // the runtime is shutting down so we won't report any events
3935 thread->th.ompt_thread_info.state = ompt_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003936#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3938 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003939#endif
3940
Jonathan Peyton30419822017-05-12 18:01:32 +00003941 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003942
Jonathan Peyton30419822017-05-12 18:01:32 +00003943 /* free up this thread slot */
3944 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003945#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00003946 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003947#endif
3948
Jonathan Peyton30419822017-05-12 18:01:32 +00003949 KMP_MB();
3950 KC_TRACE(10,
3951 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003952
Jonathan Peyton30419822017-05-12 18:01:32 +00003953 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003954}
3955
Jonathan Peyton2321d572015-06-08 19:25:25 +00003956#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003957/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00003958 Unregisters a root thread that is not the current thread. Returns the number
3959 of __kmp_threads entries freed as a result. */
3960static int __kmp_unregister_root_other_thread(int gtid) {
3961 kmp_root_t *root = __kmp_root[gtid];
3962 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003963
Jonathan Peyton30419822017-05-12 18:01:32 +00003964 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3965 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3966 KMP_ASSERT(KMP_UBER_GTID(gtid));
3967 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3968 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003969
Jonathan Peyton30419822017-05-12 18:01:32 +00003970 r = __kmp_reset_root(gtid, root);
3971 KC_TRACE(10,
3972 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
3973 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003975#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976
Jim Cownie5e8470a2013-09-27 10:38:44 +00003977#if KMP_DEBUG
3978void __kmp_task_info() {
3979
Jonathan Peyton30419822017-05-12 18:01:32 +00003980 kmp_int32 gtid = __kmp_entry_gtid();
3981 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
3982 kmp_info_t *this_thr = __kmp_threads[gtid];
3983 kmp_team_t *steam = this_thr->th.th_serial_team;
3984 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003985
Jonathan Peyton30419822017-05-12 18:01:32 +00003986 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
3987 "ptask=%p\n",
3988 gtid, tid, this_thr, team, this_thr->th.th_current_task,
3989 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003990}
3991#endif // KMP_DEBUG
3992
Jonathan Peyton30419822017-05-12 18:01:32 +00003993/* TODO optimize with one big memclr, take out what isn't needed, split
3994 responsibility to workers as much as possible, and delay initialization of
3995 features as much as possible */
3996static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
3997 int tid, int gtid) {
3998 /* this_thr->th.th_info.ds.ds_gtid is setup in
3999 kmp_allocate_thread/create_worker.
4000 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4001 kmp_info_t *master = team->t.t_threads[0];
4002 KMP_DEBUG_ASSERT(this_thr != NULL);
4003 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4004 KMP_DEBUG_ASSERT(team);
4005 KMP_DEBUG_ASSERT(team->t.t_threads);
4006 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4007 KMP_DEBUG_ASSERT(master);
4008 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004009
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004011
Jonathan Peyton30419822017-05-12 18:01:32 +00004012 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004013
Jonathan Peyton30419822017-05-12 18:01:32 +00004014 this_thr->th.th_info.ds.ds_tid = tid;
4015 this_thr->th.th_set_nproc = 0;
4016 if (__kmp_tasking_mode != tskm_immediate_exec)
4017 // When tasking is possible, threads are not safe to reap until they are
4018 // done tasking; this will be set when tasking code is exited in wait
4019 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4020 else // no tasking --> always safe to reap
4021 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004022#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004023 this_thr->th.th_set_proc_bind = proc_bind_default;
4024#if KMP_AFFINITY_SUPPORTED
4025 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004026#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004027#endif
4028 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004029
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 /* setup the thread's cache of the team structure */
4031 this_thr->th.th_team_nproc = team->t.t_nproc;
4032 this_thr->th.th_team_master = master;
4033 this_thr->th.th_team_serialized = team->t.t_serialized;
4034 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004035
Jonathan Peyton30419822017-05-12 18:01:32 +00004036 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004037
Jonathan Peyton30419822017-05-12 18:01:32 +00004038 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4039 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004040
Jonathan Peyton30419822017-05-12 18:01:32 +00004041 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4042 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004043
Jonathan Peyton30419822017-05-12 18:01:32 +00004044 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4045 tid, gtid, this_thr, this_thr->th.th_current_task));
4046 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4047 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004048
Jonathan Peyton30419822017-05-12 18:01:32 +00004049 /* TODO no worksharing in speculative threads */
4050 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004051
Jonathan Peyton30419822017-05-12 18:01:32 +00004052 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004053
4054#ifdef BUILD_TV
Jonathan Peyton30419822017-05-12 18:01:32 +00004055 this_thr->th.th_local.tv_data = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004056#endif
4057
Jonathan Peyton30419822017-05-12 18:01:32 +00004058 if (!this_thr->th.th_pri_common) {
4059 this_thr->th.th_pri_common =
4060 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4061 if (__kmp_storage_map) {
4062 __kmp_print_storage_map_gtid(
4063 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4064 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00004066 this_thr->th.th_pri_head = NULL;
4067 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00004068
Jonathan Peyton30419822017-05-12 18:01:32 +00004069 /* Initialize dynamic dispatch */
4070 {
4071 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4072 // Use team max_nproc since this will never change for the team.
4073 size_t disp_size =
4074 sizeof(dispatch_private_info_t) *
4075 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4076 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4077 team->t.t_max_nproc));
4078 KMP_ASSERT(dispatch);
4079 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4080 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081
Jonathan Peyton30419822017-05-12 18:01:32 +00004082 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004083#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004084 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004085#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004086 if (!dispatch->th_disp_buffer) {
4087 dispatch->th_disp_buffer =
4088 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004089
Jonathan Peyton30419822017-05-12 18:01:32 +00004090 if (__kmp_storage_map) {
4091 __kmp_print_storage_map_gtid(
4092 gtid, &dispatch->th_disp_buffer[0],
4093 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4094 ? 1
4095 : __kmp_dispatch_num_buffers],
4096 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4097 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4098 gtid, team->t.t_id, gtid);
4099 }
4100 } else {
4101 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004102 }
4103
Jonathan Peyton30419822017-05-12 18:01:32 +00004104 dispatch->th_dispatch_pr_current = 0;
4105 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004106
Jonathan Peyton30419822017-05-12 18:01:32 +00004107 dispatch->th_deo_fcn = 0; /* ORDERED */
4108 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4109 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004110
Jonathan Peyton30419822017-05-12 18:01:32 +00004111 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112
Jonathan Peyton30419822017-05-12 18:01:32 +00004113 if (!this_thr->th.th_task_state_memo_stack) {
4114 size_t i;
4115 this_thr->th.th_task_state_memo_stack =
4116 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4117 this_thr->th.th_task_state_top = 0;
4118 this_thr->th.th_task_state_stack_sz = 4;
4119 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4120 ++i) // zero init the stack
4121 this_thr->th.th_task_state_memo_stack[i] = 0;
4122 }
4123
4124 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4125 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4126
4127 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004128}
4129
Jonathan Peyton30419822017-05-12 18:01:32 +00004130/* allocate a new thread for the requesting team. this is only called from
4131 within a forkjoin critical section. we will first try to get an available
4132 thread from the thread pool. if none is available, we will fork a new one
4133 assuming we are able to create a new one. this should be assured, as the
4134 caller should check on this first. */
4135kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4136 int new_tid) {
4137 kmp_team_t *serial_team;
4138 kmp_info_t *new_thr;
4139 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004140
Jonathan Peyton30419822017-05-12 18:01:32 +00004141 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4142 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004143#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004144 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004145#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004146 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004147
Jonathan Peyton30419822017-05-12 18:01:32 +00004148 /* first, try to get one from the thread pool */
4149 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004150
Jonathan Peyton30419822017-05-12 18:01:32 +00004151 new_thr = (kmp_info_t *)__kmp_thread_pool;
4152 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4153 if (new_thr == __kmp_thread_pool_insert_pt) {
4154 __kmp_thread_pool_insert_pt = NULL;
4155 }
4156 TCW_4(new_thr->th.th_in_pool, FALSE);
4157 // Don't touch th_active_in_pool or th_active.
4158 // The worker thread adjusts those flags as it sleeps/awakens.
4159 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004160
Jonathan Peyton30419822017-05-12 18:01:32 +00004161 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4162 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4163 KMP_ASSERT(!new_thr->th.th_team);
4164 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4165 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004166
Jonathan Peyton30419822017-05-12 18:01:32 +00004167 /* setup the thread structure */
4168 __kmp_initialize_info(new_thr, team, new_tid,
4169 new_thr->th.th_info.ds.ds_gtid);
4170 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004171
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 TCW_4(__kmp_nth, __kmp_nth + 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004173
Jonathan Peyton30419822017-05-12 18:01:32 +00004174 new_thr->th.th_task_state = 0;
4175 new_thr->th.th_task_state_top = 0;
4176 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004177
Jim Cownie5e8470a2013-09-27 10:38:44 +00004178#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004179 /* Adjust blocktime back to zero if necessary */
4180 /* Middle initialization might not have occurred yet */
4181 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4182 if (__kmp_nth > __kmp_avail_proc) {
4183 __kmp_zero_bt = TRUE;
4184 }
4185 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004186#endif /* KMP_ADJUST_BLOCKTIME */
4187
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004188#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004189 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4190 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004191 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004192 kmp_balign_t *balign = new_thr->th.th_bar;
4193 for (b = 0; b < bs_last_barrier; ++b)
4194 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004195#endif
4196
Jonathan Peyton30419822017-05-12 18:01:32 +00004197 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4198 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004199
Jim Cownie5e8470a2013-09-27 10:38:44 +00004200 KMP_MB();
4201 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004202 }
4203
4204 /* no, well fork a new one */
4205 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4206 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4207
4208#if KMP_USE_MONITOR
4209 // If this is the first worker thread the RTL is creating, then also
4210 // launch the monitor thread. We try to do this as early as possible.
4211 if (!TCR_4(__kmp_init_monitor)) {
4212 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4213 if (!TCR_4(__kmp_init_monitor)) {
4214 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4215 TCW_4(__kmp_init_monitor, 1);
4216 __kmp_create_monitor(&__kmp_monitor);
4217 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4218#if KMP_OS_WINDOWS
4219 // AC: wait until monitor has started. This is a fix for CQ232808.
4220 // The reason is that if the library is loaded/unloaded in a loop with
4221 // small (parallel) work in between, then there is high probability that
4222 // monitor thread started after the library shutdown. At shutdown it is
4223 // too late to cope with the problem, because when the master is in
4224 // DllMain (process detach) the monitor has no chances to start (it is
4225 // blocked), and master has no means to inform the monitor that the
4226 // library has gone, because all the memory which the monitor can access
4227 // is going to be released/reset.
4228 while (TCR_4(__kmp_init_monitor) < 2) {
4229 KMP_YIELD(TRUE);
4230 }
4231 KF_TRACE(10, ("after monitor thread has started\n"));
4232#endif
4233 }
4234 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4235 }
4236#endif
4237
4238 KMP_MB();
4239 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4240 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4241 }
4242
4243 /* allocate space for it. */
4244 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4245
4246 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4247
4248 if (__kmp_storage_map) {
4249 __kmp_print_thread_storage_map(new_thr, new_gtid);
4250 }
4251
4252 // add the reserve serialized team, initialized from the team's master thread
4253 {
4254 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4255 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4256 new_thr->th.th_serial_team = serial_team =
4257 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4258#if OMPT_SUPPORT
4259 0, // root parallel id
4260#endif
4261#if OMP_40_ENABLED
4262 proc_bind_default,
4263#endif
4264 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4265 }
4266 KMP_ASSERT(serial_team);
4267 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4268 // execution (it is unused for now).
4269 serial_team->t.t_threads[0] = new_thr;
4270 KF_TRACE(10,
4271 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4272 new_thr));
4273
4274 /* setup the thread structures */
4275 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4276
4277#if USE_FAST_MEMORY
4278 __kmp_initialize_fast_memory(new_thr);
4279#endif /* USE_FAST_MEMORY */
4280
4281#if KMP_USE_BGET
4282 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4283 __kmp_initialize_bget(new_thr);
4284#endif
4285
4286 __kmp_init_random(new_thr); // Initialize random number generator
4287
4288 /* Initialize these only once when thread is grabbed for a team allocation */
4289 KA_TRACE(20,
4290 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4291 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4292
4293 int b;
4294 kmp_balign_t *balign = new_thr->th.th_bar;
4295 for (b = 0; b < bs_last_barrier; ++b) {
4296 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4297 balign[b].bb.team = NULL;
4298 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4299 balign[b].bb.use_oncore_barrier = 0;
4300 }
4301
4302 new_thr->th.th_spin_here = FALSE;
4303 new_thr->th.th_next_waiting = 0;
4304
4305#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4306 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4307 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4308 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4309 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4310#endif
4311
4312 TCW_4(new_thr->th.th_in_pool, FALSE);
4313 new_thr->th.th_active_in_pool = FALSE;
4314 TCW_4(new_thr->th.th_active, TRUE);
4315
4316 /* adjust the global counters */
4317 __kmp_all_nth++;
4318 __kmp_nth++;
4319
4320 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4321 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4322 if (__kmp_adjust_gtid_mode) {
4323 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4324 if (TCR_4(__kmp_gtid_mode) != 2) {
4325 TCW_4(__kmp_gtid_mode, 2);
4326 }
4327 } else {
4328 if (TCR_4(__kmp_gtid_mode) != 1) {
4329 TCW_4(__kmp_gtid_mode, 1);
4330 }
4331 }
4332 }
4333
4334#ifdef KMP_ADJUST_BLOCKTIME
4335 /* Adjust blocktime back to zero if necessary */
4336 /* Middle initialization might not have occurred yet */
4337 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4338 if (__kmp_nth > __kmp_avail_proc) {
4339 __kmp_zero_bt = TRUE;
4340 }
4341 }
4342#endif /* KMP_ADJUST_BLOCKTIME */
4343
4344 /* actually fork it and create the new worker thread */
4345 KF_TRACE(
4346 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4347 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4348 KF_TRACE(10,
4349 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4350
4351 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4352 new_gtid));
4353 KMP_MB();
4354 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004355}
4356
Jonathan Peyton30419822017-05-12 18:01:32 +00004357/* Reinitialize team for reuse.
4358 The hot team code calls this case at every fork barrier, so EPCC barrier
4359 test are extremely sensitive to changes in it, esp. writes to the team
4360 struct, which cause a cache invalidation in all threads.
4361 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4362static void __kmp_reinitialize_team(kmp_team_t *team,
4363 kmp_internal_control_t *new_icvs,
4364 ident_t *loc) {
4365 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4366 team->t.t_threads[0], team));
4367 KMP_DEBUG_ASSERT(team && new_icvs);
4368 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4369 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004370
Jonathan Peyton30419822017-05-12 18:01:32 +00004371 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004372 // Copy ICVs to the master thread's implicit taskdata
4373 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4374 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004375
Jonathan Peyton30419822017-05-12 18:01:32 +00004376 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4377 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004378}
4379
Jonathan Peyton30419822017-05-12 18:01:32 +00004380/* Initialize the team data structure.
4381 This assumes the t_threads and t_max_nproc are already set.
4382 Also, we don't touch the arguments */
4383static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4384 kmp_internal_control_t *new_icvs,
4385 ident_t *loc) {
4386 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004387
Jonathan Peyton30419822017-05-12 18:01:32 +00004388 /* verify */
4389 KMP_DEBUG_ASSERT(team);
4390 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4391 KMP_DEBUG_ASSERT(team->t.t_threads);
4392 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004393
Jonathan Peyton30419822017-05-12 18:01:32 +00004394 team->t.t_master_tid = 0; /* not needed */
4395 /* team->t.t_master_bar; not needed */
4396 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4397 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
Jonathan Peyton30419822017-05-12 18:01:32 +00004399 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4400 team->t.t_next_pool = NULL;
4401 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4402 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
Jonathan Peyton30419822017-05-12 18:01:32 +00004404 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4405 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406
Jonathan Peyton30419822017-05-12 18:01:32 +00004407 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4408 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409
4410#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004411 team->t.t_fp_control_saved = FALSE; /* not needed */
4412 team->t.t_x87_fpu_control_word = 0; /* not needed */
4413 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004414#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4415
Jonathan Peyton30419822017-05-12 18:01:32 +00004416 team->t.t_construct = 0;
4417 __kmp_init_lock(&team->t.t_single_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004418
Jonathan Peyton30419822017-05-12 18:01:32 +00004419 team->t.t_ordered.dt.t_value = 0;
4420 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004421
Jonathan Peyton30419822017-05-12 18:01:32 +00004422 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004423
4424#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004425 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004426#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004427 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004428
Jonathan Peyton30419822017-05-12 18:01:32 +00004429 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430
Jonathan Peyton30419822017-05-12 18:01:32 +00004431 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432
Jonathan Peyton30419822017-05-12 18:01:32 +00004433 KMP_MB();
4434 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435}
4436
Alp Toker98758b02014-03-02 04:12:06 +00004437#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004438/* Sets full mask for thread and returns old mask, no changes to structures. */
4439static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004440__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4441 if (KMP_AFFINITY_CAPABLE()) {
4442 int status;
4443 if (old_mask != NULL) {
4444 status = __kmp_get_system_affinity(old_mask, TRUE);
4445 int error = errno;
4446 if (status != 0) {
4447 __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError),
4448 KMP_ERR(error), __kmp_msg_null);
4449 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004450 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004451 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4452 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004453}
4454#endif
4455
Alp Toker98758b02014-03-02 04:12:06 +00004456#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457
Jim Cownie5e8470a2013-09-27 10:38:44 +00004458// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4459// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004460// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004461// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004462static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4463 // Copy the master thread's place partion to the team struct
4464 kmp_info_t *master_th = team->t.t_threads[0];
4465 KMP_DEBUG_ASSERT(master_th != NULL);
4466 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4467 int first_place = master_th->th.th_first_place;
4468 int last_place = master_th->th.th_last_place;
4469 int masters_place = master_th->th.th_current_place;
4470 team->t.t_first_place = first_place;
4471 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004472
Jonathan Peyton30419822017-05-12 18:01:32 +00004473 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4474 "bound to place %d partition = [%d,%d]\n",
4475 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4476 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004477
Jonathan Peyton30419822017-05-12 18:01:32 +00004478 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004479
Jonathan Peyton30419822017-05-12 18:01:32 +00004480 case proc_bind_default:
4481 // serial teams might have the proc_bind policy set to proc_bind_default. It
4482 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4483 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4484 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004485
Jonathan Peyton30419822017-05-12 18:01:32 +00004486 case proc_bind_master: {
4487 int f;
4488 int n_th = team->t.t_nproc;
4489 for (f = 1; f < n_th; f++) {
4490 kmp_info_t *th = team->t.t_threads[f];
4491 KMP_DEBUG_ASSERT(th != NULL);
4492 th->th.th_first_place = first_place;
4493 th->th.th_last_place = last_place;
4494 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004495
Jonathan Peyton30419822017-05-12 18:01:32 +00004496 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4497 "partition = [%d,%d]\n",
4498 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4499 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004500 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004501 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004502
Jonathan Peyton30419822017-05-12 18:01:32 +00004503 case proc_bind_close: {
4504 int f;
4505 int n_th = team->t.t_nproc;
4506 int n_places;
4507 if (first_place <= last_place) {
4508 n_places = last_place - first_place + 1;
4509 } else {
4510 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4511 }
4512 if (n_th <= n_places) {
4513 int place = masters_place;
4514 for (f = 1; f < n_th; f++) {
4515 kmp_info_t *th = team->t.t_threads[f];
4516 KMP_DEBUG_ASSERT(th != NULL);
4517
4518 if (place == last_place) {
4519 place = first_place;
4520 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4521 place = 0;
4522 } else {
4523 place++;
4524 }
4525 th->th.th_first_place = first_place;
4526 th->th.th_last_place = last_place;
4527 th->th.th_new_place = place;
4528
4529 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4530 "partition = [%d,%d]\n",
4531 __kmp_gtid_from_thread(team->t.t_threads[f]),
4532 team->t.t_id, f, place, first_place, last_place));
4533 }
4534 } else {
4535 int S, rem, gap, s_count;
4536 S = n_th / n_places;
4537 s_count = 0;
4538 rem = n_th - (S * n_places);
4539 gap = rem > 0 ? n_places / rem : n_places;
4540 int place = masters_place;
4541 int gap_ct = gap;
4542 for (f = 0; f < n_th; f++) {
4543 kmp_info_t *th = team->t.t_threads[f];
4544 KMP_DEBUG_ASSERT(th != NULL);
4545
4546 th->th.th_first_place = first_place;
4547 th->th.th_last_place = last_place;
4548 th->th.th_new_place = place;
4549 s_count++;
4550
4551 if ((s_count == S) && rem && (gap_ct == gap)) {
4552 // do nothing, add an extra thread to place on next iteration
4553 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4554 // we added an extra thread to this place; move to next place
4555 if (place == last_place) {
4556 place = first_place;
4557 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4558 place = 0;
4559 } else {
4560 place++;
4561 }
4562 s_count = 0;
4563 gap_ct = 1;
4564 rem--;
4565 } else if (s_count == S) { // place full; don't add extra
4566 if (place == last_place) {
4567 place = first_place;
4568 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4569 place = 0;
4570 } else {
4571 place++;
4572 }
4573 gap_ct++;
4574 s_count = 0;
4575 }
4576
4577 KA_TRACE(100,
4578 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4579 "partition = [%d,%d]\n",
4580 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4581 th->th.th_new_place, first_place, last_place));
4582 }
4583 KMP_DEBUG_ASSERT(place == masters_place);
4584 }
4585 } break;
4586
4587 case proc_bind_spread: {
4588 int f;
4589 int n_th = team->t.t_nproc;
4590 int n_places;
4591 int thidx;
4592 if (first_place <= last_place) {
4593 n_places = last_place - first_place + 1;
4594 } else {
4595 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4596 }
4597 if (n_th <= n_places) {
4598 int place = masters_place;
4599 int S = n_places / n_th;
4600 int s_count, rem, gap, gap_ct;
4601 rem = n_places - n_th * S;
4602 gap = rem ? n_th / rem : 1;
4603 gap_ct = gap;
4604 thidx = n_th;
4605 if (update_master_only == 1)
4606 thidx = 1;
4607 for (f = 0; f < thidx; f++) {
4608 kmp_info_t *th = team->t.t_threads[f];
4609 KMP_DEBUG_ASSERT(th != NULL);
4610
4611 th->th.th_first_place = place;
4612 th->th.th_new_place = place;
4613 s_count = 1;
4614 while (s_count < S) {
4615 if (place == last_place) {
4616 place = first_place;
4617 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4618 place = 0;
4619 } else {
4620 place++;
4621 }
4622 s_count++;
4623 }
4624 if (rem && (gap_ct == gap)) {
4625 if (place == last_place) {
4626 place = first_place;
4627 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4628 place = 0;
4629 } else {
4630 place++;
4631 }
4632 rem--;
4633 gap_ct = 0;
4634 }
4635 th->th.th_last_place = place;
4636 gap_ct++;
4637
4638 if (place == last_place) {
4639 place = first_place;
4640 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4641 place = 0;
4642 } else {
4643 place++;
4644 }
4645
4646 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4647 "partition = [%d,%d]\n",
4648 __kmp_gtid_from_thread(team->t.t_threads[f]),
4649 team->t.t_id, f, th->th.th_new_place,
4650 th->th.th_first_place, th->th.th_last_place));
4651 }
4652 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4653 } else {
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4656 s_count = 0;
4657 rem = n_th - (S * n_places);
4658 gap = rem > 0 ? n_places / rem : n_places;
4659 int place = masters_place;
4660 int gap_ct = gap;
4661 thidx = n_th;
4662 if (update_master_only == 1)
4663 thidx = 1;
4664 for (f = 0; f < thidx; f++) {
4665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT(th != NULL);
4667
4668 th->th.th_first_place = place;
4669 th->th.th_last_place = place;
4670 th->th.th_new_place = place;
4671 s_count++;
4672
4673 if ((s_count == S) && rem && (gap_ct == gap)) {
4674 // do nothing, add an extra thread to place on next iteration
4675 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4676 // we added an extra thread to this place; move on to next place
4677 if (place == last_place) {
4678 place = first_place;
4679 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4680 place = 0;
4681 } else {
4682 place++;
4683 }
4684 s_count = 0;
4685 gap_ct = 1;
4686 rem--;
4687 } else if (s_count == S) { // place is full; don't add extra thread
4688 if (place == last_place) {
4689 place = first_place;
4690 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4691 place = 0;
4692 } else {
4693 place++;
4694 }
4695 gap_ct++;
4696 s_count = 0;
4697 }
4698
4699 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4700 "partition = [%d,%d]\n",
4701 __kmp_gtid_from_thread(team->t.t_threads[f]),
4702 team->t.t_id, f, th->th.th_new_place,
4703 th->th.th_first_place, th->th.th_last_place));
4704 }
4705 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4706 }
4707 } break;
4708
4709 default:
4710 break;
4711 }
4712
4713 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004714}
4715
Alp Toker98758b02014-03-02 04:12:06 +00004716#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004717
Jonathan Peyton30419822017-05-12 18:01:32 +00004718/* allocate a new team data structure to use. take one off of the free pool if
4719 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004720kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004721__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004722#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004723 ompt_parallel_id_t ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004724#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004725#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004726 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004727#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004728 kmp_internal_control_t *new_icvs,
4729 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4730 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4731 int f;
4732 kmp_team_t *team;
4733 int use_hot_team = !root->r.r_active;
4734 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004735
Jonathan Peyton30419822017-05-12 18:01:32 +00004736 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4737 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4738 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4739 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004741#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004742 kmp_hot_team_ptr_t *hot_teams;
4743 if (master) {
4744 team = master->th.th_team;
4745 level = team->t.t_active_level;
4746 if (master->th.th_teams_microtask) { // in teams construct?
4747 if (master->th.th_teams_size.nteams > 1 &&
4748 ( // #teams > 1
4749 team->t.t_pkfn ==
4750 (microtask_t)__kmp_teams_master || // inner fork of the teams
4751 master->th.th_teams_level <
4752 team->t.t_level)) { // or nested parallel inside the teams
4753 ++level; // not increment if #teams==1, or for outer fork of the teams;
4754 // increment otherwise
4755 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004756 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004757 hot_teams = master->th.th_hot_teams;
4758 if (level < __kmp_hot_teams_max_level && hot_teams &&
4759 hot_teams[level]
4760 .hot_team) { // hot team has already been allocated for given level
4761 use_hot_team = 1;
4762 } else {
4763 use_hot_team = 0;
4764 }
4765 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004766#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004767 // Optimization to use a "hot" team
4768 if (use_hot_team && new_nproc > 1) {
4769 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004770#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004771 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004772#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004773 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004774#endif
4775#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004776 if (__kmp_tasking_mode != tskm_immediate_exec) {
4777 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4778 "task_team[1] = %p before reinit\n",
4779 team->t.t_task_team[0], team->t.t_task_team[1]));
4780 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004781#endif
4782
Jonathan Peyton30419822017-05-12 18:01:32 +00004783 // Has the number of threads changed?
4784 /* Let's assume the most common case is that the number of threads is
4785 unchanged, and put that case first. */
4786 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4787 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4788 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004789 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004790 if (team->t.t_size_changed == -1) {
4791 team->t.t_size_changed = 1;
4792 } else {
4793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4794 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004795
Jonathan Peyton30419822017-05-12 18:01:32 +00004796 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4797 kmp_r_sched_t new_sched = new_icvs->sched;
4798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4799 team->t.t_sched.chunk != new_sched.chunk)
4800 team->t.t_sched =
4801 new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004802
Jonathan Peyton30419822017-05-12 18:01:32 +00004803 __kmp_reinitialize_team(team, new_icvs,
4804 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004805
Jonathan Peyton30419822017-05-12 18:01:32 +00004806 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4807 team->t.t_threads[0], team));
4808 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004809
4810#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004811#if KMP_AFFINITY_SUPPORTED
4812 if ((team->t.t_size_changed == 0) &&
4813 (team->t.t_proc_bind == new_proc_bind)) {
4814 if (new_proc_bind == proc_bind_spread) {
4815 __kmp_partition_places(
4816 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004817 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004818 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4819 "proc_bind = %d, partition = [%d,%d]\n",
4820 team->t.t_id, new_proc_bind, team->t.t_first_place,
4821 team->t.t_last_place));
4822 } else {
4823 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4824 __kmp_partition_places(team);
4825 }
4826#else
4827 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4828#endif /* KMP_AFFINITY_SUPPORTED */
4829#endif /* OMP_40_ENABLED */
4830 } else if (team->t.t_nproc > new_nproc) {
4831 KA_TRACE(20,
4832 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4833 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004834
Jonathan Peyton30419822017-05-12 18:01:32 +00004835 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004836#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004837 if (__kmp_hot_teams_mode == 0) {
4838 // AC: saved number of threads should correspond to team's value in this
4839 // mode, can be bigger in mode 1, when hot team has threads in reserve
4840 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4841 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004842#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004843 /* release the extra threads we don't need any more */
4844 for (f = new_nproc; f < team->t.t_nproc; f++) {
4845 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4846 if (__kmp_tasking_mode != tskm_immediate_exec) {
4847 // When decreasing team size, threads no longer in the team should
4848 // unref task team.
4849 team->t.t_threads[f]->th.th_task_team = NULL;
4850 }
4851 __kmp_free_thread(team->t.t_threads[f]);
4852 team->t.t_threads[f] = NULL;
4853 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004854#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004855 } // (__kmp_hot_teams_mode == 0)
4856 else {
4857 // When keeping extra threads in team, switch threads to wait on own
4858 // b_go flag
4859 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4860 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4861 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4862 for (int b = 0; b < bs_last_barrier; ++b) {
4863 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4864 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004865 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004866 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4867 }
4868 }
4869 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004870#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004871 team->t.t_nproc = new_nproc;
4872 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4873 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4874 team->t.t_sched.chunk != new_icvs->sched.chunk)
4875 team->t.t_sched = new_icvs->sched;
4876 __kmp_reinitialize_team(team, new_icvs,
4877 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004878
Jonathan Peyton30419822017-05-12 18:01:32 +00004879 /* update the remaining threads */
4880 for (f = 0; f < new_nproc; ++f) {
4881 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4882 }
4883 // restore the current task state of the master thread: should be the
4884 // implicit task
4885 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4886 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004887
Jonathan Peyton30419822017-05-12 18:01:32 +00004888 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004889
4890#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004891 for (f = 0; f < team->t.t_nproc; f++) {
4892 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4893 team->t.t_threads[f]->th.th_team_nproc ==
4894 team->t.t_nproc);
4895 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004896#endif
4897
4898#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004899 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4900#if KMP_AFFINITY_SUPPORTED
4901 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004902#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004903#endif
4904 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004905#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00004906 kmp_affin_mask_t *old_mask;
4907 if (KMP_AFFINITY_CAPABLE()) {
4908 KMP_CPU_ALLOC(old_mask);
4909 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004910#endif
4911
Jonathan Peyton30419822017-05-12 18:01:32 +00004912 KA_TRACE(20,
4913 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
4914 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004915
Jonathan Peyton30419822017-05-12 18:01:32 +00004916 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004917
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004918#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004919 int avail_threads = hot_teams[level].hot_team_nth;
4920 if (new_nproc < avail_threads)
4921 avail_threads = new_nproc;
4922 kmp_info_t **other_threads = team->t.t_threads;
4923 for (f = team->t.t_nproc; f < avail_threads; ++f) {
4924 // Adjust barrier data of reserved threads (if any) of the team
4925 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004926 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004927 kmp_balign_t *balign = other_threads[f]->th.th_bar;
4928 for (b = 0; b < bs_last_barrier; ++b) {
4929 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4930 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004931#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00004932 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004933#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004934 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004935 }
4936 if (hot_teams[level].hot_team_nth >= new_nproc) {
4937 // we have all needed threads in reserve, no need to allocate any
4938 // this only possible in mode 1, cannot have reserved threads in mode 0
4939 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4940 team->t.t_nproc = new_nproc; // just get reserved threads involved
4941 } else {
4942 // we may have some threads in reserve, but not enough
4943 team->t.t_nproc =
4944 hot_teams[level]
4945 .hot_team_nth; // get reserved threads involved if any
4946 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4947#endif // KMP_NESTED_HOT_TEAMS
4948 if (team->t.t_max_nproc < new_nproc) {
4949 /* reallocate larger arrays */
4950 __kmp_reallocate_team_arrays(team, new_nproc);
4951 __kmp_reinitialize_team(team, new_icvs, NULL);
4952 }
4953
4954#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4955 /* Temporarily set full mask for master thread before creation of
4956 workers. The reason is that workers inherit the affinity from master,
4957 so if a lot of workers are created on the single core quickly, they
4958 don't get a chance to set their own affinity for a long time. */
4959 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
4960#endif
4961
4962 /* allocate new threads for the hot team */
4963 for (f = team->t.t_nproc; f < new_nproc; f++) {
4964 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
4965 KMP_DEBUG_ASSERT(new_worker);
4966 team->t.t_threads[f] = new_worker;
4967
4968 KA_TRACE(20,
4969 ("__kmp_allocate_team: team %d init T#%d arrived: "
4970 "join=%llu, plain=%llu\n",
4971 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
4972 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4973 team->t.t_bar[bs_plain_barrier].b_arrived));
4974
4975 { // Initialize barrier data for new threads.
4976 int b;
4977 kmp_balign_t *balign = new_worker->th.th_bar;
4978 for (b = 0; b < bs_last_barrier; ++b) {
4979 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4980 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
4981 KMP_BARRIER_PARENT_FLAG);
4982#if USE_DEBUGGER
4983 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4984#endif
4985 }
4986 }
4987 }
4988
4989#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4990 if (KMP_AFFINITY_CAPABLE()) {
4991 /* Restore initial master thread's affinity mask */
4992 __kmp_set_system_affinity(old_mask, TRUE);
4993 KMP_CPU_FREE(old_mask);
4994 }
4995#endif
4996#if KMP_NESTED_HOT_TEAMS
4997 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4998#endif // KMP_NESTED_HOT_TEAMS
4999 /* make sure everyone is syncronized */
5000 int old_nproc = team->t.t_nproc; // save old value and use to update only
5001 // new threads below
5002 __kmp_initialize_team(team, new_nproc, new_icvs,
5003 root->r.r_uber_thread->th.th_ident);
5004
5005 /* reinitialize the threads */
5006 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5007 for (f = 0; f < team->t.t_nproc; ++f)
5008 __kmp_initialize_info(team->t.t_threads[f], team, f,
5009 __kmp_gtid_from_tid(f, team));
5010 if (level) { // set th_task_state for new threads in nested hot team
5011 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5012 // only need to set the th_task_state for the new threads. th_task_state
5013 // for master thread will not be accurate until after this in
5014 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5015 // correct value.
5016 for (f = old_nproc; f < team->t.t_nproc; ++f)
5017 team->t.t_threads[f]->th.th_task_state =
5018 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5019 } else { // set th_task_state for new threads in non-nested hot team
5020 int old_state =
5021 team->t.t_threads[0]->th.th_task_state; // copy master's state
5022 for (f = old_nproc; f < team->t.t_nproc; ++f)
5023 team->t.t_threads[f]->th.th_task_state = old_state;
5024 }
5025
5026#ifdef KMP_DEBUG
5027 for (f = 0; f < team->t.t_nproc; ++f) {
5028 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5029 team->t.t_threads[f]->th.th_team_nproc ==
5030 team->t.t_nproc);
5031 }
5032#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005033
5034#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005035 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5036#if KMP_AFFINITY_SUPPORTED
5037 __kmp_partition_places(team);
5038#endif
5039#endif
5040 } // Check changes in number of threads
5041
5042#if OMP_40_ENABLED
5043 kmp_info_t *master = team->t.t_threads[0];
5044 if (master->th.th_teams_microtask) {
5045 for (f = 1; f < new_nproc; ++f) {
5046 // propagate teams construct specific info to workers
5047 kmp_info_t *thr = team->t.t_threads[f];
5048 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5049 thr->th.th_teams_level = master->th.th_teams_level;
5050 thr->th.th_teams_size = master->th.th_teams_size;
5051 }
5052 }
5053#endif /* OMP_40_ENABLED */
5054#if KMP_NESTED_HOT_TEAMS
5055 if (level) {
5056 // Sync barrier state for nested hot teams, not needed for outermost hot
5057 // team.
5058 for (f = 1; f < new_nproc; ++f) {
5059 kmp_info_t *thr = team->t.t_threads[f];
5060 int b;
5061 kmp_balign_t *balign = thr->th.th_bar;
5062 for (b = 0; b < bs_last_barrier; ++b) {
5063 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5064 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5065#if USE_DEBUGGER
5066 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5067#endif
5068 }
5069 }
5070 }
5071#endif // KMP_NESTED_HOT_TEAMS
5072
5073 /* reallocate space for arguments if necessary */
5074 __kmp_alloc_argv_entries(argc, team, TRUE);
5075 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5076 // The hot team re-uses the previous task team,
5077 // if untouched during the previous release->gather phase.
5078
5079 KF_TRACE(10, (" hot_team = %p\n", team));
5080
5081#if KMP_DEBUG
5082 if (__kmp_tasking_mode != tskm_immediate_exec) {
5083 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5084 "task_team[1] = %p after reinit\n",
5085 team->t.t_task_team[0], team->t.t_task_team[1]));
5086 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005087#endif
5088
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005089#if OMPT_SUPPORT
5090 __ompt_team_assign_id(team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005091#endif
5092
Jim Cownie5e8470a2013-09-27 10:38:44 +00005093 KMP_MB();
5094
Jim Cownie5e8470a2013-09-27 10:38:44 +00005095 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005096 }
5097
5098 /* next, let's try to take one from the team pool */
5099 KMP_MB();
5100 for (team = (kmp_team_t *)__kmp_team_pool; (team);) {
5101 /* TODO: consider resizing undersized teams instead of reaping them, now
5102 that we have a resizing mechanism */
5103 if (team->t.t_max_nproc >= max_nproc) {
5104 /* take this team from the team pool */
5105 __kmp_team_pool = team->t.t_next_pool;
5106
5107 /* setup the team for fresh use */
5108 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5109
5110 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5111 "task_team[1] %p to NULL\n",
5112 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5113 team->t.t_task_team[0] = NULL;
5114 team->t.t_task_team[1] = NULL;
5115
5116 /* reallocate space for arguments if necessary */
5117 __kmp_alloc_argv_entries(argc, team, TRUE);
5118 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5119
5120 KA_TRACE(
5121 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5122 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5123 { // Initialize barrier data.
5124 int b;
5125 for (b = 0; b < bs_last_barrier; ++b) {
5126 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5127#if USE_DEBUGGER
5128 team->t.t_bar[b].b_master_arrived = 0;
5129 team->t.t_bar[b].b_team_arrived = 0;
5130#endif
5131 }
5132 }
5133
5134#if OMP_40_ENABLED
5135 team->t.t_proc_bind = new_proc_bind;
5136#endif
5137
5138 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5139 team->t.t_id));
5140
5141#if OMPT_SUPPORT
5142 __ompt_team_assign_id(team, ompt_parallel_id);
5143#endif
5144
5145 KMP_MB();
5146
5147 return team;
5148 }
5149
5150/* reap team if it is too small, then loop back and check the next one */
5151// not sure if this is wise, but, will be redone during the hot-teams rewrite.
5152/* TODO: Use technique to find the right size hot-team, don't reap them */
5153 team = __kmp_reap_team(team);
5154 __kmp_team_pool = team;
5155 }
5156
5157 /* nothing available in the pool, no matter, make a new team! */
5158 KMP_MB();
5159 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5160
5161 /* and set it up */
5162 team->t.t_max_nproc = max_nproc;
5163 /* NOTE well, for some reason allocating one big buffer and dividing it up
5164 seems to really hurt performance a lot on the P4, so, let's not use this */
5165 __kmp_allocate_team_arrays(team, max_nproc);
5166
5167 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5168 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5169
5170 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5171 "%p to NULL\n",
5172 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5173 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5174 // memory, no need to duplicate
5175 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5176 // memory, no need to duplicate
5177
5178 if (__kmp_storage_map) {
5179 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5180 }
5181
5182 /* allocate space for arguments */
5183 __kmp_alloc_argv_entries(argc, team, FALSE);
5184 team->t.t_argc = argc;
5185
5186 KA_TRACE(20,
5187 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5188 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5189 { // Initialize barrier data.
5190 int b;
5191 for (b = 0; b < bs_last_barrier; ++b) {
5192 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5193#if USE_DEBUGGER
5194 team->t.t_bar[b].b_master_arrived = 0;
5195 team->t.t_bar[b].b_team_arrived = 0;
5196#endif
5197 }
5198 }
5199
5200#if OMP_40_ENABLED
5201 team->t.t_proc_bind = new_proc_bind;
5202#endif
5203
5204#if OMPT_SUPPORT
5205 __ompt_team_assign_id(team, ompt_parallel_id);
5206 team->t.ompt_serialized_team_info = NULL;
5207#endif
5208
5209 KMP_MB();
5210
5211 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5212 team->t.t_id));
5213
5214 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005215}
5216
5217/* TODO implement hot-teams at all levels */
5218/* TODO implement lazy thread release on demand (disband request) */
5219
5220/* free the team. return it to the team pool. release all the threads
5221 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005222void __kmp_free_team(kmp_root_t *root,
5223 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5224 int f;
5225 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5226 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005227
Jonathan Peyton30419822017-05-12 18:01:32 +00005228 /* verify state */
5229 KMP_DEBUG_ASSERT(root);
5230 KMP_DEBUG_ASSERT(team);
5231 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5232 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005233
Jonathan Peyton30419822017-05-12 18:01:32 +00005234 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005235#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005236 int level;
5237 kmp_hot_team_ptr_t *hot_teams;
5238 if (master) {
5239 level = team->t.t_active_level - 1;
5240 if (master->th.th_teams_microtask) { // in teams construct?
5241 if (master->th.th_teams_size.nteams > 1) {
5242 ++level; // level was not increased in teams construct for
5243 // team_of_masters
5244 }
5245 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5246 master->th.th_teams_level == team->t.t_level) {
5247 ++level; // level was not increased in teams construct for
5248 // team_of_workers before the parallel
5249 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005250 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005251 hot_teams = master->th.th_hot_teams;
5252 if (level < __kmp_hot_teams_max_level) {
5253 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5254 use_hot_team = 1;
5255 }
5256 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005257#endif // KMP_NESTED_HOT_TEAMS
5258
Jonathan Peyton30419822017-05-12 18:01:32 +00005259 /* team is done working */
5260 TCW_SYNC_PTR(team->t.t_pkfn,
5261 NULL); // Important for Debugging Support Library.
5262 team->t.t_copyin_counter = 0; // init counter for possible reuse
5263 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005264
Jonathan Peyton30419822017-05-12 18:01:32 +00005265 /* if we are non-hot team, release our threads */
5266 if (!use_hot_team) {
5267 if (__kmp_tasking_mode != tskm_immediate_exec) {
5268 // Wait for threads to reach reapable state
5269 for (f = 1; f < team->t.t_nproc; ++f) {
5270 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5271 kmp_info_t *th = team->t.t_threads[f];
5272 volatile kmp_uint32 *state = &th->th.th_reap_state;
5273 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005274#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005275 // On Windows a thread can be killed at any time, check this
5276 DWORD ecode;
5277 if (!__kmp_is_thread_alive(th, &ecode)) {
5278 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5279 break;
5280 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005281#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005282 // first check if thread is sleeping
5283 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5284 if (fl.is_sleeping())
5285 fl.resume(__kmp_gtid_from_thread(th));
5286 KMP_CPU_PAUSE();
5287 }
5288 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005289
Jonathan Peyton30419822017-05-12 18:01:32 +00005290 // Delete task teams
5291 int tt_idx;
5292 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5293 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5294 if (task_team != NULL) {
5295 for (f = 0; f < team->t.t_nproc;
5296 ++f) { // Have all threads unref task teams
5297 team->t.t_threads[f]->th.th_task_team = NULL;
5298 }
5299 KA_TRACE(
5300 20,
5301 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5302 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005303#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005304 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005305#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005306 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005307 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005309 }
5310
Jonathan Peyton30419822017-05-12 18:01:32 +00005311 // Reset pointer to parent team only for non-hot teams.
5312 team->t.t_parent = NULL;
5313 team->t.t_level = 0;
5314 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005315
Jonathan Peyton30419822017-05-12 18:01:32 +00005316 /* free the worker threads */
5317 for (f = 1; f < team->t.t_nproc; ++f) {
5318 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5319 __kmp_free_thread(team->t.t_threads[f]);
5320 team->t.t_threads[f] = NULL;
5321 }
5322
5323 /* put the team back in the team pool */
5324 /* TODO limit size of team pool, call reap_team if pool too large */
5325 team->t.t_next_pool = (kmp_team_t *)__kmp_team_pool;
5326 __kmp_team_pool = (volatile kmp_team_t *)team;
5327 }
5328
5329 KMP_MB();
5330}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005331
5332/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005333kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5334 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005335
Jonathan Peyton30419822017-05-12 18:01:32 +00005336 KMP_DEBUG_ASSERT(team);
5337 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5338 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5339 KMP_DEBUG_ASSERT(team->t.t_threads);
5340 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005341
Jonathan Peyton30419822017-05-12 18:01:32 +00005342 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005343
Jonathan Peyton30419822017-05-12 18:01:32 +00005344 /* free stuff */
5345 __kmp_free_team_arrays(team);
5346 if (team->t.t_argv != &team->t.t_inline_argv[0])
5347 __kmp_free((void *)team->t.t_argv);
5348 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005349
Jonathan Peyton30419822017-05-12 18:01:32 +00005350 KMP_MB();
5351 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005352}
5353
Jim Cownie5e8470a2013-09-27 10:38:44 +00005354// Free the thread. Don't reap it, just place it on the pool of available
5355// threads.
5356//
5357// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5358// binding for the affinity mechanism to be useful.
5359//
5360// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5361// However, we want to avoid a potential performance problem by always
5362// scanning through the list to find the correct point at which to insert
5363// the thread (potential N**2 behavior). To do this we keep track of the
5364// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5365// With single-level parallelism, threads will always be added to the tail
5366// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5367// parallelism, all bets are off and we may need to scan through the entire
5368// free list.
5369//
5370// This change also has a potentially large performance benefit, for some
5371// applications. Previously, as threads were freed from the hot team, they
5372// would be placed back on the free list in inverse order. If the hot team
5373// grew back to it's original size, then the freed thread would be placed
5374// back on the hot team in reverse order. This could cause bad cache
5375// locality problems on programs where the size of the hot team regularly
5376// grew and shrunk.
5377//
5378// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005379void __kmp_free_thread(kmp_info_t *this_th) {
5380 int gtid;
5381 kmp_info_t **scan;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005382
Jonathan Peyton30419822017-05-12 18:01:32 +00005383 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5384 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005385
Jonathan Peyton30419822017-05-12 18:01:32 +00005386 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005387
Jonathan Peyton30419822017-05-12 18:01:32 +00005388 // When moving thread to pool, switch thread to wait on own b_go flag, and
5389 // uninitialized (NULL team).
5390 int b;
5391 kmp_balign_t *balign = this_th->th.th_bar;
5392 for (b = 0; b < bs_last_barrier; ++b) {
5393 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5394 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5395 balign[b].bb.team = NULL;
5396 balign[b].bb.leaf_kids = 0;
5397 }
5398 this_th->th.th_task_state = 0;
5399
5400 /* put thread back on the free pool */
5401 TCW_PTR(this_th->th.th_team, NULL);
5402 TCW_PTR(this_th->th.th_root, NULL);
5403 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5404
5405 // If the __kmp_thread_pool_insert_pt is already past the new insert
5406 // point, then we need to re-scan the entire list.
5407 gtid = this_th->th.th_info.ds.ds_gtid;
5408 if (__kmp_thread_pool_insert_pt != NULL) {
5409 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5410 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5411 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005412 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005413 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005414
Jonathan Peyton30419822017-05-12 18:01:32 +00005415 // Scan down the list to find the place to insert the thread.
5416 // scan is the address of a link in the list, possibly the address of
5417 // __kmp_thread_pool itself.
5418 //
5419 // In the absence of nested parallism, the for loop will have 0 iterations.
5420 if (__kmp_thread_pool_insert_pt != NULL) {
5421 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5422 } else {
5423 scan = (kmp_info_t **)&__kmp_thread_pool;
5424 }
5425 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5426 scan = &((*scan)->th.th_next_pool))
5427 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005428
Jonathan Peyton30419822017-05-12 18:01:32 +00005429 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5430 // to its address.
5431 TCW_PTR(this_th->th.th_next_pool, *scan);
5432 __kmp_thread_pool_insert_pt = *scan = this_th;
5433 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5434 (this_th->th.th_info.ds.ds_gtid <
5435 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5436 TCW_4(this_th->th.th_in_pool, TRUE);
5437 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005438
Jonathan Peyton30419822017-05-12 18:01:32 +00005439 TCW_4(__kmp_nth, __kmp_nth - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005440
5441#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005442 /* Adjust blocktime back to user setting or default if necessary */
5443 /* Middle initialization might never have occurred */
5444 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5445 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5446 if (__kmp_nth <= __kmp_avail_proc) {
5447 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005448 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005449 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005450#endif /* KMP_ADJUST_BLOCKTIME */
5451
Jonathan Peyton30419822017-05-12 18:01:32 +00005452 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005453}
5454
Jim Cownie5e8470a2013-09-27 10:38:44 +00005455/* ------------------------------------------------------------------------ */
5456
Jonathan Peyton30419822017-05-12 18:01:32 +00005457void *__kmp_launch_thread(kmp_info_t *this_thr) {
5458 int gtid = this_thr->th.th_info.ds.ds_gtid;
5459 /* void *stack_data;*/
5460 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005461
Jonathan Peyton30419822017-05-12 18:01:32 +00005462 KMP_MB();
5463 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005464
Jonathan Peyton30419822017-05-12 18:01:32 +00005465 if (__kmp_env_consistency_check) {
5466 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5467 }
5468
5469#if OMPT_SUPPORT
5470 if (ompt_enabled) {
5471 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5472 this_thr->th.ompt_thread_info.wait_id = 0;
5473 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5474 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5475 __ompt_thread_begin(ompt_thread_worker, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005476 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005477 }
5478#endif
5479
5480 /* This is the place where threads wait for work */
5481 while (!TCR_4(__kmp_global.g.g_done)) {
5482 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5483 KMP_MB();
5484
5485 /* wait for work to do */
5486 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005487
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005488#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005489 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005490 this_thr->th.ompt_thread_info.state = ompt_state_idle;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005491 }
5492#endif
5493
Jonathan Peyton30419822017-05-12 18:01:32 +00005494 /* No tid yet since not part of a team */
5495 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5496
5497#if OMPT_SUPPORT
5498 if (ompt_enabled) {
5499 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5500 }
5501#endif
5502
5503 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5504
5505 /* have we been allocated? */
5506 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5507#if OMPT_SUPPORT
5508 ompt_task_info_t *task_info;
5509 ompt_parallel_id_t my_parallel_id;
5510 if (ompt_enabled) {
5511 task_info = __ompt_get_taskinfo(0);
5512 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5513 }
5514#endif
5515 /* we were just woken up, so run our new task */
5516 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5517 int rc;
5518 KA_TRACE(20,
5519 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5520 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5521 (*pteam)->t.t_pkfn));
5522
5523 updateHWFPControl(*pteam);
5524
5525#if OMPT_SUPPORT
5526 if (ompt_enabled) {
5527 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5528 // Initialize OMPT task id for implicit task.
5529 int tid = __kmp_tid_from_gtid(gtid);
5530 task_info->task_id = __ompt_task_id_new(tid);
5531 }
5532#endif
5533
5534 {
5535 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5536 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5537 rc = (*pteam)->t.t_invoke(gtid);
5538 }
5539 KMP_ASSERT(rc);
5540
5541#if OMPT_SUPPORT
5542 if (ompt_enabled) {
5543 /* no frame set while outside task */
5544 task_info->frame.exit_runtime_frame = NULL;
5545
5546 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5547 }
5548#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005549 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005550 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5551 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5552 (*pteam)->t.t_pkfn));
5553 }
5554 /* join barrier after parallel region */
5555 __kmp_join_barrier(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005556#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00005557 if (ompt_enabled) {
5558 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5559 // don't access *pteam here: it may have already been freed
5560 // by the master thread behind the barrier (possible race)
5561 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5562 my_parallel_id, task_info->task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005563 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005564 task_info->frame.exit_runtime_frame = NULL;
5565 task_info->task_id = 0;
5566 }
5567#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005568 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005569 }
5570 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005571
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005572#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005573 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5574 __ompt_thread_end(ompt_thread_worker, gtid);
5575 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005576#endif
5577
Jonathan Peyton30419822017-05-12 18:01:32 +00005578 this_thr->th.th_task_team = NULL;
5579 /* run the destructors for the threadprivate data for this thread */
5580 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005581
Jonathan Peyton30419822017-05-12 18:01:32 +00005582 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5583 KMP_MB();
5584 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005585}
5586
5587/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005588
Jonathan Peyton30419822017-05-12 18:01:32 +00005589void __kmp_internal_end_dest(void *specific_gtid) {
5590#if KMP_COMPILER_ICC
5591#pragma warning(push)
5592#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5593// significant bits
5594#endif
5595 // Make sure no significant bits are lost
5596 int gtid = (kmp_intptr_t)specific_gtid - 1;
5597#if KMP_COMPILER_ICC
5598#pragma warning(pop)
5599#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005600
Jonathan Peyton30419822017-05-12 18:01:32 +00005601 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5602 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5603 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005604
Jonathan Peyton30419822017-05-12 18:01:32 +00005605 /* josh: One reason for setting the gtid specific data even when it is being
5606 destroyed by pthread is to allow gtid lookup through thread specific data
5607 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5608 that gets executed in the call to __kmp_internal_end_thread, actually
5609 gets the gtid through the thread specific data. Setting it here seems
5610 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5611 to run smoothly.
5612 todo: get rid of this after we remove the dependence on
5613 __kmp_gtid_get_specific */
5614 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5615 __kmp_gtid_set_specific(gtid);
5616#ifdef KMP_TDATA_GTID
5617 __kmp_gtid = gtid;
5618#endif
5619 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005620}
5621
Jonathan Peyton99016992015-05-26 17:32:53 +00005622#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005623
Jonathan Peyton30419822017-05-12 18:01:32 +00005624// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5625// destructors work perfectly, but in real libomp.so I have no evidence it is
5626// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005627
Jonathan Peyton30419822017-05-12 18:01:32 +00005628__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5629 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005630}
5631
Jonathan Peyton30419822017-05-12 18:01:32 +00005632void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005633
5634#endif
5635
Jonathan Peyton30419822017-05-12 18:01:32 +00005636/* [Windows] josh: when the atexit handler is called, there may still be more
5637 than one thread alive */
5638void __kmp_internal_end_atexit(void) {
5639 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5640 /* [Windows]
5641 josh: ideally, we want to completely shutdown the library in this atexit
5642 handler, but stat code that depends on thread specific data for gtid fails
5643 because that data becomes unavailable at some point during the shutdown, so
5644 we call __kmp_internal_end_thread instead. We should eventually remove the
5645 dependency on __kmp_get_specific_gtid in the stat code and use
5646 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005647
Jonathan Peyton30419822017-05-12 18:01:32 +00005648 // TODO: Can some of this comment about GVS be removed?
5649 I suspect that the offending stat code is executed when the calling thread
5650 tries to clean up a dead root thread's data structures, resulting in GVS
5651 code trying to close the GVS structures for that thread, but since the stat
5652 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5653 the calling thread is cleaning up itself instead of another thread, it get
5654 confused. This happens because allowing a thread to unregister and cleanup
5655 another thread is a recent modification for addressing an issue.
5656 Based on the current design (20050722), a thread may end up
5657 trying to unregister another thread only if thread death does not trigger
5658 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5659 thread specific data destructor function to detect thread death. For
5660 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5661 is nothing. Thus, the workaround is applicable only for Windows static
5662 stat library. */
5663 __kmp_internal_end_library(-1);
5664#if KMP_OS_WINDOWS
5665 __kmp_close_console();
5666#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005667}
5668
Jonathan Peyton30419822017-05-12 18:01:32 +00005669static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5670 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005671
Jonathan Peyton30419822017-05-12 18:01:32 +00005672 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005673
Jonathan Peyton30419822017-05-12 18:01:32 +00005674 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005675
Jonathan Peyton30419822017-05-12 18:01:32 +00005676 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005677
Jonathan Peyton30419822017-05-12 18:01:32 +00005678 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005679
Jonathan Peyton30419822017-05-12 18:01:32 +00005680 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5681 /* Assume the threads are at the fork barrier here */
5682 KA_TRACE(
5683 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5684 gtid));
5685 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5686 * (GEH) */
5687 ANNOTATE_HAPPENS_BEFORE(thread);
5688 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5689 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005690 }; // if
5691
Jonathan Peyton30419822017-05-12 18:01:32 +00005692 // Terminate OS thread.
5693 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005694
Jonathan Peyton30419822017-05-12 18:01:32 +00005695 // The thread was killed asynchronously. If it was actively
5696 // spinning in the thread pool, decrement the global count.
5697 //
5698 // There is a small timing hole here - if the worker thread was just waking
5699 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5700 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5701 // the global counter might not get updated.
5702 //
5703 // Currently, this can only happen as the library is unloaded,
5704 // so there are no harmful side effects.
5705 if (thread->th.th_active_in_pool) {
5706 thread->th.th_active_in_pool = FALSE;
5707 KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
5708 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5709 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005710
Jonathan Peyton30419822017-05-12 18:01:32 +00005711 // Decrement # of [worker] threads in the pool.
5712 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5713 --__kmp_thread_pool_nth;
5714 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00005715
Jonathan Peyton30419822017-05-12 18:01:32 +00005716 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005717
Jonathan Peyton30419822017-05-12 18:01:32 +00005718// Free the fast memory for tasking
5719#if USE_FAST_MEMORY
5720 __kmp_free_fast_memory(thread);
5721#endif /* USE_FAST_MEMORY */
5722
5723 __kmp_suspend_uninitialize_thread(thread);
5724
5725 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5726 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5727
5728 --__kmp_all_nth;
5729// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005730
5731#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005732 /* Adjust blocktime back to user setting or default if necessary */
5733 /* Middle initialization might never have occurred */
5734 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5735 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5736 if (__kmp_nth <= __kmp_avail_proc) {
5737 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005738 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005739 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005740#endif /* KMP_ADJUST_BLOCKTIME */
5741
Jonathan Peyton30419822017-05-12 18:01:32 +00005742 /* free the memory being used */
5743 if (__kmp_env_consistency_check) {
5744 if (thread->th.th_cons) {
5745 __kmp_free_cons_stack(thread->th.th_cons);
5746 thread->th.th_cons = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005747 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00005748 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005749
Jonathan Peyton30419822017-05-12 18:01:32 +00005750 if (thread->th.th_pri_common != NULL) {
5751 __kmp_free(thread->th.th_pri_common);
5752 thread->th.th_pri_common = NULL;
5753 }; // if
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005754
Jonathan Peyton30419822017-05-12 18:01:32 +00005755 if (thread->th.th_task_state_memo_stack != NULL) {
5756 __kmp_free(thread->th.th_task_state_memo_stack);
5757 thread->th.th_task_state_memo_stack = NULL;
5758 }
5759
5760#if KMP_USE_BGET
5761 if (thread->th.th_local.bget_data != NULL) {
5762 __kmp_finalize_bget(thread);
5763 }; // if
5764#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005765
Alp Toker98758b02014-03-02 04:12:06 +00005766#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005767 if (thread->th.th_affin_mask != NULL) {
5768 KMP_CPU_FREE(thread->th.th_affin_mask);
5769 thread->th.th_affin_mask = NULL;
5770 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005771#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005772
Jonathan Peyton30419822017-05-12 18:01:32 +00005773 __kmp_reap_team(thread->th.th_serial_team);
5774 thread->th.th_serial_team = NULL;
5775 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005776
Jonathan Peyton30419822017-05-12 18:01:32 +00005777 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005778
5779} // __kmp_reap_thread
5780
Jonathan Peyton30419822017-05-12 18:01:32 +00005781static void __kmp_internal_end(void) {
5782 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005783
Jonathan Peyton30419822017-05-12 18:01:32 +00005784 /* First, unregister the library */
5785 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005786
Jonathan Peyton30419822017-05-12 18:01:32 +00005787#if KMP_OS_WINDOWS
5788 /* In Win static library, we can't tell when a root actually dies, so we
5789 reclaim the data structures for any root threads that have died but not
5790 unregistered themselves, in order to shut down cleanly.
5791 In Win dynamic library we also can't tell when a thread dies. */
5792 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5793// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005794#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005795
Jonathan Peyton30419822017-05-12 18:01:32 +00005796 for (i = 0; i < __kmp_threads_capacity; i++)
5797 if (__kmp_root[i])
5798 if (__kmp_root[i]->r.r_active)
5799 break;
5800 KMP_MB(); /* Flush all pending memory write invalidates. */
5801 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5802
5803 if (i < __kmp_threads_capacity) {
5804#if KMP_USE_MONITOR
5805 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5806 KMP_MB(); /* Flush all pending memory write invalidates. */
5807
5808// Need to check that monitor was initialized before reaping it. If we are
5809// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5810// __kmp_monitor will appear to contain valid data, but it is only valid in the
5811// parent process, not the child.
5812 // New behavior (201008): instead of keying off of the flag
5813 // __kmp_init_parallel, the monitor thread creation is keyed off
5814 // of the new flag __kmp_init_monitor.
5815 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5816 if (TCR_4(__kmp_init_monitor)) {
5817 __kmp_reap_monitor(&__kmp_monitor);
5818 TCW_4(__kmp_init_monitor, 0);
5819 }
5820 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5821 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5822#endif // KMP_USE_MONITOR
5823 } else {
5824/* TODO move this to cleanup code */
5825#ifdef KMP_DEBUG
5826 /* make sure that everything has properly ended */
5827 for (i = 0; i < __kmp_threads_capacity; i++) {
5828 if (__kmp_root[i]) {
5829 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5830 // there can be uber threads alive here
5831 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5832 }
5833 }
5834#endif
5835
5836 KMP_MB();
5837
5838 // Reap the worker threads.
5839 // This is valid for now, but be careful if threads are reaped sooner.
5840 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5841 // Get the next thread from the pool.
5842 kmp_info_t *thread = (kmp_info_t *)__kmp_thread_pool;
5843 __kmp_thread_pool = thread->th.th_next_pool;
5844 // Reap it.
5845 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5846 thread->th.th_next_pool = NULL;
5847 thread->th.th_in_pool = FALSE;
5848 __kmp_reap_thread(thread, 0);
5849 }; // while
5850 __kmp_thread_pool_insert_pt = NULL;
5851
5852 // Reap teams.
5853 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5854 // Get the next team from the pool.
5855 kmp_team_t *team = (kmp_team_t *)__kmp_team_pool;
5856 __kmp_team_pool = team->t.t_next_pool;
5857 // Reap it.
5858 team->t.t_next_pool = NULL;
5859 __kmp_reap_team(team);
5860 }; // while
5861
5862 __kmp_reap_task_teams();
5863
5864 for (i = 0; i < __kmp_threads_capacity; ++i) {
5865 // TBD: Add some checking...
5866 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5867 }
5868
5869 /* Make sure all threadprivate destructors get run by joining with all
5870 worker threads before resetting this flag */
5871 TCW_SYNC_4(__kmp_init_common, FALSE);
5872
5873 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5874 KMP_MB();
5875
5876#if KMP_USE_MONITOR
5877 // See note above: One of the possible fixes for CQ138434 / CQ140126
5878 //
5879 // FIXME: push both code fragments down and CSE them?
5880 // push them into __kmp_cleanup() ?
5881 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5882 if (TCR_4(__kmp_init_monitor)) {
5883 __kmp_reap_monitor(&__kmp_monitor);
5884 TCW_4(__kmp_init_monitor, 0);
5885 }
5886 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5887 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5888#endif
5889 } /* else !__kmp_global.t_active */
5890 TCW_4(__kmp_init_gtid, FALSE);
5891 KMP_MB(); /* Flush all pending memory write invalidates. */
5892
5893 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005894#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005895 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005896#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005897}
5898
Jonathan Peyton30419822017-05-12 18:01:32 +00005899void __kmp_internal_end_library(int gtid_req) {
5900 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5901 /* this shouldn't be a race condition because __kmp_internal_end() is the
5902 only place to clear __kmp_serial_init */
5903 /* we'll check this later too, after we get the lock */
5904 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
5905 // redundaant, because the next check will work in any case.
5906 if (__kmp_global.g.g_abort) {
5907 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
5908 /* TODO abort? */
5909 return;
5910 }
5911 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5912 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
5913 return;
5914 }
5915
5916 KMP_MB(); /* Flush all pending memory write invalidates. */
5917
5918 /* find out who we are and what we should do */
5919 {
5920 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
5921 KA_TRACE(
5922 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
5923 if (gtid == KMP_GTID_SHUTDOWN) {
5924 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
5925 "already shutdown\n"));
5926 return;
5927 } else if (gtid == KMP_GTID_MONITOR) {
5928 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
5929 "registered, or system shutdown\n"));
5930 return;
5931 } else if (gtid == KMP_GTID_DNE) {
5932 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
5933 "shutdown\n"));
5934 /* we don't know who we are, but we may still shutdown the library */
5935 } else if (KMP_UBER_GTID(gtid)) {
5936 /* unregister ourselves as an uber thread. gtid is no longer valid */
5937 if (__kmp_root[gtid]->r.r_active) {
5938 __kmp_global.g.g_abort = -1;
5939 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5940 KA_TRACE(10,
5941 ("__kmp_internal_end_library: root still active, abort T#%d\n",
5942 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005943 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00005944 } else {
5945 KA_TRACE(
5946 10,
5947 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
5948 __kmp_unregister_root_current_thread(gtid);
5949 }
5950 } else {
5951/* worker threads may call this function through the atexit handler, if they
5952 * call exit() */
5953/* For now, skip the usual subsequent processing and just dump the debug buffer.
5954 TODO: do a thorough shutdown instead */
5955#ifdef DUMP_DEBUG_ON_EXIT
5956 if (__kmp_debug_buf)
5957 __kmp_dump_debug_buffer();
5958#endif
5959 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005960 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005961 }
5962 /* synchronize the termination process */
5963 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005964
Jonathan Peyton30419822017-05-12 18:01:32 +00005965 /* have we already finished */
5966 if (__kmp_global.g.g_abort) {
5967 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
5968 /* TODO abort? */
5969 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5970 return;
5971 }
5972 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5973 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5974 return;
5975 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005976
Jonathan Peyton30419822017-05-12 18:01:32 +00005977 /* We need this lock to enforce mutex between this reading of
5978 __kmp_threads_capacity and the writing by __kmp_register_root.
5979 Alternatively, we can use a counter of roots that is atomically updated by
5980 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
5981 __kmp_internal_end_*. */
5982 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005983
Jonathan Peyton30419822017-05-12 18:01:32 +00005984 /* now we can safely conduct the actual termination */
5985 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005986
Jonathan Peyton30419822017-05-12 18:01:32 +00005987 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
5988 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005989
Jonathan Peyton30419822017-05-12 18:01:32 +00005990 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005991
Jonathan Peyton30419822017-05-12 18:01:32 +00005992#ifdef DUMP_DEBUG_ON_EXIT
5993 if (__kmp_debug_buf)
5994 __kmp_dump_debug_buffer();
5995#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005996
Jonathan Peyton30419822017-05-12 18:01:32 +00005997#if KMP_OS_WINDOWS
5998 __kmp_close_console();
5999#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006000
Jonathan Peyton30419822017-05-12 18:01:32 +00006001 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006002
6003} // __kmp_internal_end_library
6004
Jonathan Peyton30419822017-05-12 18:01:32 +00006005void __kmp_internal_end_thread(int gtid_req) {
6006 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006007
Jonathan Peyton30419822017-05-12 18:01:32 +00006008 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6009 /* this shouldn't be a race condition because __kmp_internal_end() is the
6010 * only place to clear __kmp_serial_init */
6011 /* we'll check this later too, after we get the lock */
6012 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6013 // redundant, because the next check will work in any case.
6014 if (__kmp_global.g.g_abort) {
6015 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6016 /* TODO abort? */
6017 return;
6018 }
6019 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6020 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6021 return;
6022 }
6023
6024 KMP_MB(); /* Flush all pending memory write invalidates. */
6025
6026 /* find out who we are and what we should do */
6027 {
6028 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6029 KA_TRACE(10,
6030 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6031 if (gtid == KMP_GTID_SHUTDOWN) {
6032 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6033 "already shutdown\n"));
6034 return;
6035 } else if (gtid == KMP_GTID_MONITOR) {
6036 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6037 "registered, or system shutdown\n"));
6038 return;
6039 } else if (gtid == KMP_GTID_DNE) {
6040 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6041 "shutdown\n"));
6042 return;
6043 /* we don't know who we are */
6044 } else if (KMP_UBER_GTID(gtid)) {
6045 /* unregister ourselves as an uber thread. gtid is no longer valid */
6046 if (__kmp_root[gtid]->r.r_active) {
6047 __kmp_global.g.g_abort = -1;
6048 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6049 KA_TRACE(10,
6050 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6051 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006052 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006053 } else {
6054 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6055 gtid));
6056 __kmp_unregister_root_current_thread(gtid);
6057 }
6058 } else {
6059 /* just a worker thread, let's leave */
6060 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6061
6062 if (gtid >= 0) {
6063 __kmp_threads[gtid]->th.th_task_team = NULL;
6064 }
6065
6066 KA_TRACE(10,
6067 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6068 gtid));
6069 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006070 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006071 }
6072#if defined KMP_DYNAMIC_LIB
6073 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6074 // thread, because we will better shutdown later in the library destructor.
6075 // The reason of this change is performance problem when non-openmp thread in
6076 // a loop forks and joins many openmp threads. We can save a lot of time
6077 // keeping worker threads alive until the program shutdown.
6078 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6079 // and Windows(DPD200287443) that occurs when using critical sections from
6080 // foreign threads.
6081 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6082 return;
6083#endif
6084 /* synchronize the termination process */
6085 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006086
Jonathan Peyton30419822017-05-12 18:01:32 +00006087 /* have we already finished */
6088 if (__kmp_global.g.g_abort) {
6089 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6090 /* TODO abort? */
6091 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6092 return;
6093 }
6094 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6095 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6096 return;
6097 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006098
Jonathan Peyton30419822017-05-12 18:01:32 +00006099 /* We need this lock to enforce mutex between this reading of
6100 __kmp_threads_capacity and the writing by __kmp_register_root.
6101 Alternatively, we can use a counter of roots that is atomically updated by
6102 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6103 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006104
Jonathan Peyton30419822017-05-12 18:01:32 +00006105 /* should we finish the run-time? are all siblings done? */
6106 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006107
Jonathan Peyton30419822017-05-12 18:01:32 +00006108 for (i = 0; i < __kmp_threads_capacity; ++i) {
6109 if (KMP_UBER_GTID(i)) {
6110 KA_TRACE(
6111 10,
6112 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6113 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6114 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6115 return;
6116 };
6117 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006118
Jonathan Peyton30419822017-05-12 18:01:32 +00006119 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006120
Jonathan Peyton30419822017-05-12 18:01:32 +00006121 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006122
Jonathan Peyton30419822017-05-12 18:01:32 +00006123 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6124 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006125
Jonathan Peyton30419822017-05-12 18:01:32 +00006126 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006127
Jonathan Peyton30419822017-05-12 18:01:32 +00006128#ifdef DUMP_DEBUG_ON_EXIT
6129 if (__kmp_debug_buf)
6130 __kmp_dump_debug_buffer();
6131#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006132} // __kmp_internal_end_thread
6133
Jonathan Peyton30419822017-05-12 18:01:32 +00006134// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006135// Library registration stuff.
6136
Jonathan Peyton30419822017-05-12 18:01:32 +00006137static long __kmp_registration_flag = 0;
6138// Random value used to indicate library initialization.
6139static char *__kmp_registration_str = NULL;
6140// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006141
Jonathan Peyton30419822017-05-12 18:01:32 +00006142static inline char *__kmp_reg_status_name() {
6143 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6144 each thread. If registration and unregistration go in different threads
6145 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6146 env var can not be found, because the name will contain different pid. */
6147 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006148} // __kmp_reg_status_get
6149
Jonathan Peyton30419822017-05-12 18:01:32 +00006150void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006151
Jonathan Peyton30419822017-05-12 18:01:32 +00006152 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6153 int done = 0;
6154 union {
6155 double dtime;
6156 long ltime;
6157 } time;
6158#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6159 __kmp_initialize_system_tick();
6160#endif
6161 __kmp_read_system_time(&time.dtime);
6162 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6163 __kmp_registration_str =
6164 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6165 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006166
Jonathan Peyton30419822017-05-12 18:01:32 +00006167 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6168 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006169
Jonathan Peyton30419822017-05-12 18:01:32 +00006170 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006171
Jonathan Peyton30419822017-05-12 18:01:32 +00006172 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006173
Jonathan Peyton30419822017-05-12 18:01:32 +00006174 // Set environment variable, but do not overwrite if it is exist.
6175 __kmp_env_set(name, __kmp_registration_str, 0);
6176 // Check the variable is written.
6177 value = __kmp_env_get(name);
6178 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006179
Jonathan Peyton30419822017-05-12 18:01:32 +00006180 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006181
Jonathan Peyton30419822017-05-12 18:01:32 +00006182 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006183
Jonathan Peyton30419822017-05-12 18:01:32 +00006184 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6185 // Check whether it alive or dead.
6186 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6187 char *tail = value;
6188 char *flag_addr_str = NULL;
6189 char *flag_val_str = NULL;
6190 char const *file_name = NULL;
6191 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6192 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6193 file_name = tail;
6194 if (tail != NULL) {
6195 long *flag_addr = 0;
6196 long flag_val = 0;
6197 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6198 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6199 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6200 // First, check whether environment-encoded address is mapped into
6201 // addr space.
6202 // If so, dereference it to see if it still has the right value.
6203 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6204 neighbor = 1;
6205 } else {
6206 // If not, then we know the other copy of the library is no longer
6207 // running.
6208 neighbor = 2;
6209 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00006210 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00006211 }; // if
6212 switch (neighbor) {
6213 case 0: // Cannot parse environment variable -- neighbor status unknown.
6214 // Assume it is the incompatible format of future version of the
6215 // library. Assume the other library is alive.
6216 // WARN( ... ); // TODO: Issue a warning.
6217 file_name = "unknown library";
6218 // Attention! Falling to the next case. That's intentional.
6219 case 1: { // Neighbor is alive.
6220 // Check it is allowed.
6221 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6222 if (!__kmp_str_match_true(duplicate_ok)) {
6223 // That's not allowed. Issue fatal error.
6224 __kmp_msg(kmp_ms_fatal,
6225 KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6226 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6227 }; // if
6228 KMP_INTERNAL_FREE(duplicate_ok);
6229 __kmp_duplicate_library_ok = 1;
6230 done = 1; // Exit the loop.
6231 } break;
6232 case 2: { // Neighbor is dead.
6233 // Clear the variable and try to register library again.
6234 __kmp_env_unset(name);
6235 } break;
6236 default: { KMP_DEBUG_ASSERT(0); } break;
6237 }; // switch
Jim Cownie5e8470a2013-09-27 10:38:44 +00006238
Jonathan Peyton30419822017-05-12 18:01:32 +00006239 }; // if
6240 KMP_INTERNAL_FREE((void *)value);
6241
6242 }; // while
6243 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006244
6245} // func __kmp_register_library_startup
6246
Jonathan Peyton30419822017-05-12 18:01:32 +00006247void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006248
Jonathan Peyton30419822017-05-12 18:01:32 +00006249 char *name = __kmp_reg_status_name();
6250 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006251
Jonathan Peyton30419822017-05-12 18:01:32 +00006252 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6253 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6254 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6255 // Ok, this is our variable. Delete it.
6256 __kmp_env_unset(name);
6257 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00006258
Jonathan Peyton30419822017-05-12 18:01:32 +00006259 KMP_INTERNAL_FREE(__kmp_registration_str);
6260 KMP_INTERNAL_FREE(value);
6261 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006262
Jonathan Peyton30419822017-05-12 18:01:32 +00006263 __kmp_registration_flag = 0;
6264 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006265
6266} // __kmp_unregister_library
6267
Jim Cownie5e8470a2013-09-27 10:38:44 +00006268// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006269// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006270
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006271#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006272
Jonathan Peyton30419822017-05-12 18:01:32 +00006273static void __kmp_check_mic_type() {
6274 kmp_cpuid_t cpuid_state = {0};
6275 kmp_cpuid_t *cs_p = &cpuid_state;
6276 __kmp_x86_cpuid(1, 0, cs_p);
6277 // We don't support mic1 at the moment
6278 if ((cs_p->eax & 0xff0) == 0xB10) {
6279 __kmp_mic_type = mic2;
6280 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6281 __kmp_mic_type = mic3;
6282 } else {
6283 __kmp_mic_type = non_mic;
6284 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006285}
6286
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006287#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006288
Jonathan Peyton30419822017-05-12 18:01:32 +00006289static void __kmp_do_serial_initialize(void) {
6290 int i, gtid;
6291 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006292
Jonathan Peyton30419822017-05-12 18:01:32 +00006293 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006294
Jonathan Peyton30419822017-05-12 18:01:32 +00006295 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6296 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6297 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6298 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6299 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006300
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006301#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006302 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006303#endif
6304
Jonathan Peyton30419822017-05-12 18:01:32 +00006305 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006306
Jonathan Peyton30419822017-05-12 18:01:32 +00006307 /* Initialize internal memory allocator */
6308 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006309
Jonathan Peyton30419822017-05-12 18:01:32 +00006310 /* Register the library startup via an environment variable and check to see
6311 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006312
Jonathan Peyton30419822017-05-12 18:01:32 +00006313 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006314
Jonathan Peyton30419822017-05-12 18:01:32 +00006315 /* TODO reinitialization of library */
6316 if (TCR_4(__kmp_global.g.g_done)) {
6317 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6318 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006319
Jonathan Peyton30419822017-05-12 18:01:32 +00006320 __kmp_global.g.g_abort = 0;
6321 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006322
Jonathan Peyton30419822017-05-12 18:01:32 +00006323/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006324#if KMP_USE_ADAPTIVE_LOCKS
6325#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006326 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006327#endif
6328#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006329#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006330 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006331#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006332 __kmp_init_lock(&__kmp_global_lock);
6333 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6334 __kmp_init_lock(&__kmp_debug_lock);
6335 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6336 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6337 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6338 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6339 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6340 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6341 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6342 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6343 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6344 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6345 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6346 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6347 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6348 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6349 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006350#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006351 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006352#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006353 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006354
Jonathan Peyton30419822017-05-12 18:01:32 +00006355 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006356
Jonathan Peyton30419822017-05-12 18:01:32 +00006357 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006358
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006359#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006360 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006361#endif
6362
Jonathan Peyton30419822017-05-12 18:01:32 +00006363// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006364#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006365 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006366#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006367 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006368
Jonathan Peyton30419822017-05-12 18:01:32 +00006369 // From __kmp_init_dflt_team_nth()
6370 /* assume the entire machine will be used */
6371 __kmp_dflt_team_nth_ub = __kmp_xproc;
6372 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6373 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6374 }
6375 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6376 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6377 }
6378 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006379
Jonathan Peyton30419822017-05-12 18:01:32 +00006380 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6381 // part
6382 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006383#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006384 __kmp_monitor_wakeups =
6385 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6386 __kmp_bt_intervals =
6387 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006388#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006389 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6390 __kmp_library = library_throughput;
6391 // From KMP_SCHEDULE initialization
6392 __kmp_static = kmp_sch_static_balanced;
6393// AC: do not use analytical here, because it is non-monotonous
6394//__kmp_guided = kmp_sch_guided_iterative_chunked;
6395//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6396// need to repeat assignment
6397// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6398// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006399#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006400#define kmp_reduction_barrier_gather_bb ((int)1)
6401#define kmp_reduction_barrier_release_bb ((int)1)
6402#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6403#define kmp_reduction_barrier_release_pat bp_hyper_bar
6404#endif // KMP_FAST_REDUCTION_BARRIER
6405 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6406 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6407 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6408 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6409 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6410#if KMP_FAST_REDUCTION_BARRIER
6411 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6412 // lin_64 ): hyper,1
6413 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6414 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6415 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6416 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006417 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006418#endif // KMP_FAST_REDUCTION_BARRIER
6419 }
6420#if KMP_FAST_REDUCTION_BARRIER
6421#undef kmp_reduction_barrier_release_pat
6422#undef kmp_reduction_barrier_gather_pat
6423#undef kmp_reduction_barrier_release_bb
6424#undef kmp_reduction_barrier_gather_bb
6425#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006426#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006427 if (__kmp_mic_type == mic2) { // KNC
6428 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6429 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6430 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6431 1; // forkjoin release
6432 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6433 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6434 }
6435#if KMP_FAST_REDUCTION_BARRIER
6436 if (__kmp_mic_type == mic2) { // KNC
6437 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6438 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6439 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006440#endif // KMP_FAST_REDUCTION_BARRIER
6441#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006442
Jonathan Peyton30419822017-05-12 18:01:32 +00006443// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006444#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006445 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006446#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006447 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006448#endif
6449
Jonathan Peyton30419822017-05-12 18:01:32 +00006450 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6451 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006452
Jonathan Peyton30419822017-05-12 18:01:32 +00006453 __kmp_global.g.g_dynamic = FALSE;
6454 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006455
Jonathan Peyton30419822017-05-12 18:01:32 +00006456 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006457
Jonathan Peyton30419822017-05-12 18:01:32 +00006458// Print all messages in message catalog for testing purposes.
6459#ifdef KMP_DEBUG
6460 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6461 if (__kmp_str_match_true(val)) {
6462 kmp_str_buf_t buffer;
6463 __kmp_str_buf_init(&buffer);
6464 __kmp_i18n_dump_catalog(&buffer);
6465 __kmp_printf("%s", buffer.str);
6466 __kmp_str_buf_free(&buffer);
6467 }; // if
6468 __kmp_env_free(&val);
6469#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006470
Jonathan Peyton30419822017-05-12 18:01:32 +00006471 __kmp_threads_capacity =
6472 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6473 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6474 __kmp_tp_capacity = __kmp_default_tp_capacity(
6475 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006476
Jonathan Peyton30419822017-05-12 18:01:32 +00006477 // If the library is shut down properly, both pools must be NULL. Just in
6478 // case, set them to NULL -- some memory may leak, but subsequent code will
6479 // work even if pools are not freed.
6480 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6481 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6482 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6483 __kmp_thread_pool = NULL;
6484 __kmp_thread_pool_insert_pt = NULL;
6485 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006486
Jonathan Peyton30419822017-05-12 18:01:32 +00006487 /* Allocate all of the variable sized records */
6488 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6489 * expandable */
6490 /* Since allocation is cache-aligned, just add extra padding at the end */
6491 size =
6492 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6493 CACHE_LINE;
6494 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6495 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6496 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006497
Jonathan Peyton30419822017-05-12 18:01:32 +00006498 /* init thread counts */
6499 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6500 0); // Asserts fail if the library is reinitializing and
6501 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6502 __kmp_all_nth = 0;
6503 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006504
Jonathan Peyton30419822017-05-12 18:01:32 +00006505 /* setup the uber master thread and hierarchy */
6506 gtid = __kmp_register_root(TRUE);
6507 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6508 KMP_ASSERT(KMP_UBER_GTID(gtid));
6509 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006510
Jonathan Peyton30419822017-05-12 18:01:32 +00006511 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006512
Jonathan Peyton30419822017-05-12 18:01:32 +00006513 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006514
Jonathan Peyton30419822017-05-12 18:01:32 +00006515#if KMP_OS_UNIX
6516 /* invoke the child fork handler */
6517 __kmp_register_atfork();
6518#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006519
Jonathan Peyton30419822017-05-12 18:01:32 +00006520#if !defined KMP_DYNAMIC_LIB
6521 {
6522 /* Invoke the exit handler when the program finishes, only for static
6523 library. For dynamic library, we already have _fini and DllMain. */
6524 int rc = atexit(__kmp_internal_end_atexit);
6525 if (rc != 0) {
6526 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6527 __kmp_msg_null);
6528 }; // if
6529 }
6530#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006531
Jonathan Peyton30419822017-05-12 18:01:32 +00006532#if KMP_HANDLE_SIGNALS
6533#if KMP_OS_UNIX
6534 /* NOTE: make sure that this is called before the user installs their own
6535 signal handlers so that the user handlers are called first. this way they
6536 can return false, not call our handler, avoid terminating the library, and
6537 continue execution where they left off. */
6538 __kmp_install_signals(FALSE);
6539#endif /* KMP_OS_UNIX */
6540#if KMP_OS_WINDOWS
6541 __kmp_install_signals(TRUE);
6542#endif /* KMP_OS_WINDOWS */
6543#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006544
Jonathan Peyton30419822017-05-12 18:01:32 +00006545 /* we have finished the serial initialization */
6546 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006547
Jonathan Peyton30419822017-05-12 18:01:32 +00006548 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006549
Jonathan Peyton30419822017-05-12 18:01:32 +00006550 if (__kmp_settings) {
6551 __kmp_env_print();
6552 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006553
6554#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006555 if (__kmp_display_env || __kmp_display_env_verbose) {
6556 __kmp_env_print_2();
6557 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006558#endif // OMP_40_ENABLED
6559
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006560#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006561 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006562#endif
6563
Jonathan Peyton30419822017-05-12 18:01:32 +00006564 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006565
Jonathan Peyton30419822017-05-12 18:01:32 +00006566 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006567}
6568
Jonathan Peyton30419822017-05-12 18:01:32 +00006569void __kmp_serial_initialize(void) {
6570 if (__kmp_init_serial) {
6571 return;
6572 }
6573 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6574 if (__kmp_init_serial) {
6575 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6576 return;
6577 }
6578 __kmp_do_serial_initialize();
6579 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6580}
6581
6582static void __kmp_do_middle_initialize(void) {
6583 int i, j;
6584 int prev_dflt_team_nth;
6585
6586 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006587 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006589
Jonathan Peyton30419822017-05-12 18:01:32 +00006590 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006591
Jonathan Peyton30419822017-05-12 18:01:32 +00006592 // Save the previous value for the __kmp_dflt_team_nth so that
6593 // we can avoid some reinitialization if it hasn't changed.
6594 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006595
Alp Toker98758b02014-03-02 04:12:06 +00006596#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006597 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6598 // number of cores on the machine.
6599 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006600
Jonathan Peyton30419822017-05-12 18:01:32 +00006601 // Run through the __kmp_threads array and set the affinity mask
6602 // for each root thread that is currently registered with the RTL.
6603 for (i = 0; i < __kmp_threads_capacity; i++) {
6604 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6605 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006606 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006607 }
Alp Toker98758b02014-03-02 04:12:06 +00006608#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006609
Jonathan Peyton30419822017-05-12 18:01:32 +00006610 KMP_ASSERT(__kmp_xproc > 0);
6611 if (__kmp_avail_proc == 0) {
6612 __kmp_avail_proc = __kmp_xproc;
6613 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006614
Jonathan Peyton30419822017-05-12 18:01:32 +00006615 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6616 // correct them now
6617 j = 0;
6618 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6619 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6620 __kmp_avail_proc;
6621 j++;
6622 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006623
Jonathan Peyton30419822017-05-12 18:01:32 +00006624 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006625#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006626 // Default #threads = #cores
6627 __kmp_dflt_team_nth = __kmp_ncores;
6628 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6629 "__kmp_ncores (%d)\n",
6630 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006631#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006632 // Default #threads = #available OS procs
6633 __kmp_dflt_team_nth = __kmp_avail_proc;
6634 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6635 "__kmp_avail_proc(%d)\n",
6636 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006637#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006638 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006639
Jonathan Peyton30419822017-05-12 18:01:32 +00006640 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6641 __kmp_dflt_team_nth = KMP_MIN_NTH;
6642 }
6643 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6644 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6645 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006646
Jonathan Peyton30419822017-05-12 18:01:32 +00006647 // There's no harm in continuing if the following check fails,
6648 // but it indicates an error in the previous logic.
6649 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006650
Jonathan Peyton30419822017-05-12 18:01:32 +00006651 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6652 // Run through the __kmp_threads array and set the num threads icv for each
6653 // root thread that is currently registered with the RTL (which has not
6654 // already explicitly set its nthreads-var with a call to
6655 // omp_set_num_threads()).
6656 for (i = 0; i < __kmp_threads_capacity; i++) {
6657 kmp_info_t *thread = __kmp_threads[i];
6658 if (thread == NULL)
6659 continue;
6660 if (thread->th.th_current_task->td_icvs.nproc != 0)
6661 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006662
Jonathan Peyton30419822017-05-12 18:01:32 +00006663 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006664 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006665 }
6666 KA_TRACE(
6667 20,
6668 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6669 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006670
6671#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006672 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6673 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6674 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6675 if (__kmp_nth > __kmp_avail_proc) {
6676 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006677 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006678 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006679#endif /* KMP_ADJUST_BLOCKTIME */
6680
Jonathan Peyton30419822017-05-12 18:01:32 +00006681 /* we have finished middle initialization */
6682 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006683
Jonathan Peyton30419822017-05-12 18:01:32 +00006684 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006685}
6686
Jonathan Peyton30419822017-05-12 18:01:32 +00006687void __kmp_middle_initialize(void) {
6688 if (__kmp_init_middle) {
6689 return;
6690 }
6691 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6692 if (__kmp_init_middle) {
6693 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6694 return;
6695 }
6696 __kmp_do_middle_initialize();
6697 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6698}
6699
6700void __kmp_parallel_initialize(void) {
6701 int gtid = __kmp_entry_gtid(); // this might be a new root
6702
6703 /* synchronize parallel initialization (for sibling) */
6704 if (TCR_4(__kmp_init_parallel))
6705 return;
6706 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6707 if (TCR_4(__kmp_init_parallel)) {
6708 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6709 return;
6710 }
6711
6712 /* TODO reinitialization after we have already shut down */
6713 if (TCR_4(__kmp_global.g.g_done)) {
6714 KA_TRACE(
6715 10,
6716 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6717 __kmp_infinite_loop();
6718 }
6719
6720 /* jc: The lock __kmp_initz_lock is already held, so calling
6721 __kmp_serial_initialize would cause a deadlock. So we call
6722 __kmp_do_serial_initialize directly. */
6723 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006724 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006725 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006726
Jonathan Peyton30419822017-05-12 18:01:32 +00006727 /* begin initialization */
6728 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6729 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006730
6731#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006732 // Save the FP control regs.
6733 // Worker threads will set theirs to these values at thread startup.
6734 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6735 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6736 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006737#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6738
6739#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006740#if KMP_HANDLE_SIGNALS
6741 /* must be after __kmp_serial_initialize */
6742 __kmp_install_signals(TRUE);
6743#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006744#endif
6745
Jonathan Peyton30419822017-05-12 18:01:32 +00006746 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006747
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006748#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006749 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6750 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6751 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006752#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006753 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6754 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6755 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006756#endif
6757
Jonathan Peyton30419822017-05-12 18:01:32 +00006758 if (__kmp_version) {
6759 __kmp_print_version_2();
6760 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006761
Jonathan Peyton30419822017-05-12 18:01:32 +00006762 /* we have finished parallel initialization */
6763 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006764
Jonathan Peyton30419822017-05-12 18:01:32 +00006765 KMP_MB();
6766 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006767
Jonathan Peyton30419822017-05-12 18:01:32 +00006768 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006769}
6770
Jim Cownie5e8470a2013-09-27 10:38:44 +00006771/* ------------------------------------------------------------------------ */
6772
Jonathan Peyton30419822017-05-12 18:01:32 +00006773void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6774 kmp_team_t *team) {
6775 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006776
Jonathan Peyton30419822017-05-12 18:01:32 +00006777 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006778
Jonathan Peyton30419822017-05-12 18:01:32 +00006779 /* none of the threads have encountered any constructs, yet. */
6780 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006781#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006782 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006783#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006784 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6785 KMP_DEBUG_ASSERT(dispatch);
6786 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6787 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6788 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006789
Jonathan Peyton30419822017-05-12 18:01:32 +00006790 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006791#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006792 dispatch->th_doacross_buf_idx =
6793 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006794#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006795 if (__kmp_env_consistency_check)
6796 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006797
Jonathan Peyton30419822017-05-12 18:01:32 +00006798 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006799}
6800
Jonathan Peyton30419822017-05-12 18:01:32 +00006801void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6802 kmp_team_t *team) {
6803 if (__kmp_env_consistency_check)
6804 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006805
Jonathan Peyton30419822017-05-12 18:01:32 +00006806 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006807}
6808
Jonathan Peyton30419822017-05-12 18:01:32 +00006809int __kmp_invoke_task_func(int gtid) {
6810 int rc;
6811 int tid = __kmp_tid_from_gtid(gtid);
6812 kmp_info_t *this_thr = __kmp_threads[gtid];
6813 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006814
Jonathan Peyton30419822017-05-12 18:01:32 +00006815 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006816#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006817 if (__itt_stack_caller_create_ptr) {
6818 __kmp_itt_stack_callee_enter(
6819 (__itt_caller)
6820 team->t.t_stack_id); // inform ittnotify about entering user's code
6821 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006822#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006823#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006824 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006825#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006826
6827#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006828 void *dummy;
6829 void **exit_runtime_p;
6830 ompt_task_id_t my_task_id;
6831 ompt_parallel_id_t my_parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006832
Jonathan Peyton30419822017-05-12 18:01:32 +00006833 if (ompt_enabled) {
6834 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
6835 .ompt_task_info.frame.exit_runtime_frame);
6836 } else {
6837 exit_runtime_p = &dummy;
6838 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006839
6840#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00006841 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6842 my_parallel_id = team->t.ompt_team_info.parallel_id;
6843 if (ompt_enabled &&
6844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6845 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
6846 my_task_id);
6847 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006848#endif
6849#endif
6850
Jonathan Peyton30419822017-05-12 18:01:32 +00006851 {
6852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6854 rc =
6855 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6856 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006857#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006858 ,
6859 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006860#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006861 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006862#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006863 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006864#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006865 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006866
Jim Cownie5e8470a2013-09-27 10:38:44 +00006867#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006868 if (__itt_stack_caller_create_ptr) {
6869 __kmp_itt_stack_callee_leave(
6870 (__itt_caller)
6871 team->t.t_stack_id); // inform ittnotify about leaving user's code
6872 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006873#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006874 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006875
Jonathan Peyton30419822017-05-12 18:01:32 +00006876 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006877}
6878
6879#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006880void __kmp_teams_master(int gtid) {
6881 // This routine is called by all master threads in teams construct
6882 kmp_info_t *thr = __kmp_threads[gtid];
6883 kmp_team_t *team = thr->th.th_team;
6884 ident_t *loc = team->t.t_ident;
6885 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6886 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6887 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6888 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6889 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6890// Launch league of teams now, but not let workers execute
6891// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006892#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006894#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006895 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006896#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006897 (void *)thr->th.th_teams_microtask, // "unwrapped" task
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006898#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006899 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
6900 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006901#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006902 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006903#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006904
Jonathan Peyton30419822017-05-12 18:01:32 +00006905 // AC: last parameter "1" eliminates join barrier which won't work because
6906 // worker threads are in a fork barrier waiting for more parallel regions
6907 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006908#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006909 ,
6910 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006911#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006912 ,
6913 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006914}
6915
Jonathan Peyton30419822017-05-12 18:01:32 +00006916int __kmp_invoke_teams_master(int gtid) {
6917 kmp_info_t *this_thr = __kmp_threads[gtid];
6918 kmp_team_t *team = this_thr->th.th_team;
6919#if KMP_DEBUG
6920 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
6921 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
6922 (void *)__kmp_teams_master);
6923#endif
6924 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
6925 __kmp_teams_master(gtid);
6926 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
6927 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006928}
6929#endif /* OMP_40_ENABLED */
6930
6931/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00006932 encountered by this team. since this should be enclosed in the forkjoin
6933 critical section it should avoid race conditions with assymmetrical nested
6934 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006935
Jonathan Peyton30419822017-05-12 18:01:32 +00006936void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
6937 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00006938
Jonathan Peyton30419822017-05-12 18:01:32 +00006939 if (num_threads > 0)
6940 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006941}
6942
6943#if OMP_40_ENABLED
6944
6945/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00006946 the number of threads for the next parallel region encountered */
6947void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
6948 int num_threads) {
6949 kmp_info_t *thr = __kmp_threads[gtid];
6950 KMP_DEBUG_ASSERT(num_teams >= 0);
6951 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006952
Jonathan Peyton30419822017-05-12 18:01:32 +00006953 if (num_teams == 0)
6954 num_teams = 1; // default number of teams is 1.
6955 if (num_teams > __kmp_max_nth) { // if too many teams requested?
6956 if (!__kmp_reserve_warn) {
6957 __kmp_reserve_warn = 1;
6958 __kmp_msg(kmp_ms_warning,
6959 KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth),
6960 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006961 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006962 num_teams = __kmp_max_nth;
6963 }
6964 // Set number of teams (number of threads in the outer "parallel" of the
6965 // teams)
6966 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006967
Jonathan Peyton30419822017-05-12 18:01:32 +00006968 // Remember the number of threads for inner parallel regions
6969 if (num_threads == 0) {
6970 if (!TCR_4(__kmp_init_middle))
6971 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
6972 num_threads = __kmp_avail_proc / num_teams;
6973 if (num_teams * num_threads > __kmp_max_nth) {
6974 // adjust num_threads w/o warning as it is not user setting
6975 num_threads = __kmp_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006976 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006977 } else {
6978 if (num_teams * num_threads > __kmp_max_nth) {
6979 int new_threads = __kmp_max_nth / num_teams;
6980 if (!__kmp_reserve_warn) { // user asked for too many threads
6981 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6982 __kmp_msg(kmp_ms_warning,
6983 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
6984 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
6985 }
6986 num_threads = new_threads;
6987 }
6988 }
6989 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006990}
6991
Jim Cownie5e8470a2013-09-27 10:38:44 +00006992// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00006993void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
6994 kmp_info_t *thr = __kmp_threads[gtid];
6995 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006996}
6997
6998#endif /* OMP_40_ENABLED */
6999
7000/* Launch the worker threads into the microtask. */
7001
Jonathan Peyton30419822017-05-12 18:01:32 +00007002void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7003 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007004
7005#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007006 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007007#endif /* KMP_DEBUG */
7008
Jonathan Peyton30419822017-05-12 18:01:32 +00007009 KMP_DEBUG_ASSERT(team);
7010 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7011 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7012 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007013
Jonathan Peyton30419822017-05-12 18:01:32 +00007014 team->t.t_construct = 0; /* no single directives seen yet */
7015 team->t.t_ordered.dt.t_value =
7016 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007017
Jonathan Peyton30419822017-05-12 18:01:32 +00007018 /* Reset the identifiers on the dispatch buffer */
7019 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7020 if (team->t.t_max_nproc > 1) {
7021 int i;
7022 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7023 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007024#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007025 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007026#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007027 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007028 } else {
7029 team->t.t_disp_buffer[0].buffer_index = 0;
7030#if OMP_45_ENABLED
7031 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7032#endif
7033 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007034
Jonathan Peyton30419822017-05-12 18:01:32 +00007035 KMP_MB(); /* Flush all pending memory write invalidates. */
7036 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007037
7038#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007039 for (f = 0; f < team->t.t_nproc; f++) {
7040 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7041 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7042 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007043#endif /* KMP_DEBUG */
7044
Jonathan Peyton30419822017-05-12 18:01:32 +00007045 /* release the worker threads so they may begin working */
7046 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007047}
7048
Jonathan Peyton30419822017-05-12 18:01:32 +00007049void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7050 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007051
Jonathan Peyton30419822017-05-12 18:01:32 +00007052 KMP_DEBUG_ASSERT(team);
7053 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7054 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7055 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007056
Jonathan Peyton30419822017-05-12 18:01:32 +00007057/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007058
7059#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007060 if (__kmp_threads[gtid] &&
7061 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7062 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7063 __kmp_threads[gtid]);
7064 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7065 "team->t.t_nproc=%d\n",
7066 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7067 team->t.t_nproc);
7068 __kmp_print_structure();
7069 }
7070 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7071 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007072#endif /* KMP_DEBUG */
7073
Jonathan Peyton30419822017-05-12 18:01:32 +00007074 __kmp_join_barrier(gtid); /* wait for everyone */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007075
Jonathan Peyton30419822017-05-12 18:01:32 +00007076 KMP_MB(); /* Flush all pending memory write invalidates. */
7077 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007078}
7079
Jim Cownie5e8470a2013-09-27 10:38:44 +00007080/* ------------------------------------------------------------------------ */
7081
7082#ifdef USE_LOAD_BALANCE
7083
Jim Cownie5e8470a2013-09-27 10:38:44 +00007084// Return the worker threads actively spinning in the hot team, if we
7085// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007086static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7087 int i;
7088 int retval;
7089 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090
Jonathan Peyton30419822017-05-12 18:01:32 +00007091 if (root->r.r_active) {
7092 return 0;
7093 }
7094 hot_team = root->r.r_hot_team;
7095 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7096 return hot_team->t.t_nproc - 1; // Don't count master thread
7097 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007098
Jonathan Peyton30419822017-05-12 18:01:32 +00007099 // Skip the master thread - it is accounted for elsewhere.
7100 retval = 0;
7101 for (i = 1; i < hot_team->t.t_nproc; i++) {
7102 if (hot_team->t.t_threads[i]->th.th_active) {
7103 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007104 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007105 }
7106 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007107}
7108
Jim Cownie5e8470a2013-09-27 10:38:44 +00007109// Perform an automatic adjustment to the number of
7110// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007111static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7112 int retval;
7113 int pool_active;
7114 int hot_team_active;
7115 int team_curr_active;
7116 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007117
Jonathan Peyton30419822017-05-12 18:01:32 +00007118 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7119 set_nproc));
7120 KMP_DEBUG_ASSERT(root);
7121 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7122 ->th.th_current_task->td_icvs.dynamic == TRUE);
7123 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007124
Jonathan Peyton30419822017-05-12 18:01:32 +00007125 if (set_nproc == 1) {
7126 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7127 return 1;
7128 }
7129
7130 // Threads that are active in the thread pool, active in the hot team for this
7131 // particular root (if we are at the outer par level), and the currently
7132 // executing thread (to become the master) are available to add to the new
7133 // team, but are currently contributing to the system load, and must be
7134 // accounted for.
7135 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7136 hot_team_active = __kmp_active_hot_team_nproc(root);
7137 team_curr_active = pool_active + hot_team_active + 1;
7138
7139 // Check the system load.
7140 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7141 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7142 "hot team active = %d\n",
7143 system_active, pool_active, hot_team_active));
7144
7145 if (system_active < 0) {
7146 // There was an error reading the necessary info from /proc, so use the
7147 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7148 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7149 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7150 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7151
7152 // Make this call behave like the thread limit algorithm.
7153 retval = __kmp_avail_proc - __kmp_nth +
7154 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7155 if (retval > set_nproc) {
7156 retval = set_nproc;
7157 }
7158 if (retval < KMP_MIN_NTH) {
7159 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007160 }
7161
Jonathan Peyton30419822017-05-12 18:01:32 +00007162 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7163 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007164 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007165 }
7166
7167 // There is a slight delay in the load balance algorithm in detecting new
7168 // running procs. The real system load at this instant should be at least as
7169 // large as the #active omp thread that are available to add to the team.
7170 if (system_active < team_curr_active) {
7171 system_active = team_curr_active;
7172 }
7173 retval = __kmp_avail_proc - system_active + team_curr_active;
7174 if (retval > set_nproc) {
7175 retval = set_nproc;
7176 }
7177 if (retval < KMP_MIN_NTH) {
7178 retval = KMP_MIN_NTH;
7179 }
7180
7181 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7182 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007183} // __kmp_load_balance_nproc()
7184
7185#endif /* USE_LOAD_BALANCE */
7186
Jim Cownie5e8470a2013-09-27 10:38:44 +00007187/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007188
7189/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007190void __kmp_cleanup(void) {
7191 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007192
Jonathan Peyton30419822017-05-12 18:01:32 +00007193 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007194
Jonathan Peyton30419822017-05-12 18:01:32 +00007195 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007196#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007197 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007198#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007199 TCW_4(__kmp_init_parallel, FALSE);
7200 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007201
Jonathan Peyton30419822017-05-12 18:01:32 +00007202 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007203#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007204 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007205#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007206 __kmp_cleanup_hierarchy();
7207 TCW_4(__kmp_init_middle, FALSE);
7208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007209
Jonathan Peyton30419822017-05-12 18:01:32 +00007210 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007211
Jonathan Peyton30419822017-05-12 18:01:32 +00007212 if (__kmp_init_serial) {
7213 __kmp_runtime_destroy();
7214 __kmp_init_serial = FALSE;
7215 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007216
Jonathan Peyton30419822017-05-12 18:01:32 +00007217 for (f = 0; f < __kmp_threads_capacity; f++) {
7218 if (__kmp_root[f] != NULL) {
7219 __kmp_free(__kmp_root[f]);
7220 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007221 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007222 }
7223 __kmp_free(__kmp_threads);
7224 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7225 // there is no need in freeing __kmp_root.
7226 __kmp_threads = NULL;
7227 __kmp_root = NULL;
7228 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007229
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007230#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007231 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007232#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007233 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007234#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007235
Jonathan Peyton30419822017-05-12 18:01:32 +00007236#if KMP_AFFINITY_SUPPORTED
7237 KMP_INTERNAL_FREE((void *)__kmp_cpuinfo_file);
7238 __kmp_cpuinfo_file = NULL;
7239#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007240
Jonathan Peyton30419822017-05-12 18:01:32 +00007241#if KMP_USE_ADAPTIVE_LOCKS
7242#if KMP_DEBUG_ADAPTIVE_LOCKS
7243 __kmp_print_speculative_stats();
7244#endif
7245#endif
7246 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7247 __kmp_nested_nth.nth = NULL;
7248 __kmp_nested_nth.size = 0;
7249 __kmp_nested_nth.used = 0;
7250 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7251 __kmp_nested_proc_bind.bind_types = NULL;
7252 __kmp_nested_proc_bind.size = 0;
7253 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007254
Jonathan Peyton30419822017-05-12 18:01:32 +00007255 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007256
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007257#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007258 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007259#endif
7260
Jonathan Peyton30419822017-05-12 18:01:32 +00007261 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007262}
7263
7264/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007265
7266int __kmp_ignore_mppbeg(void) {
7267 char *env;
7268
7269 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7270 if (__kmp_str_match_false(env))
7271 return FALSE;
7272 }
7273 // By default __kmpc_begin() is no-op.
7274 return TRUE;
7275}
7276
7277int __kmp_ignore_mppend(void) {
7278 char *env;
7279
7280 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7281 if (__kmp_str_match_false(env))
7282 return FALSE;
7283 }
7284 // By default __kmpc_end() is no-op.
7285 return TRUE;
7286}
7287
7288void __kmp_internal_begin(void) {
7289 int gtid;
7290 kmp_root_t *root;
7291
7292 /* this is a very important step as it will register new sibling threads
7293 and assign these new uber threads a new gtid */
7294 gtid = __kmp_entry_gtid();
7295 root = __kmp_threads[gtid]->th.th_root;
7296 KMP_ASSERT(KMP_UBER_GTID(gtid));
7297
7298 if (root->r.r_begin)
7299 return;
7300 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7301 if (root->r.r_begin) {
7302 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7303 return;
7304 }
7305
7306 root->r.r_begin = TRUE;
7307
7308 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7309}
7310
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311/* ------------------------------------------------------------------------ */
7312
Jonathan Peyton30419822017-05-12 18:01:32 +00007313void __kmp_user_set_library(enum library_type arg) {
7314 int gtid;
7315 kmp_root_t *root;
7316 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007317
Jonathan Peyton30419822017-05-12 18:01:32 +00007318 /* first, make sure we are initialized so we can get our gtid */
7319
7320 gtid = __kmp_entry_gtid();
7321 thread = __kmp_threads[gtid];
7322
7323 root = thread->th.th_root;
7324
7325 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7326 library_serial));
7327 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7328 thread */
7329 KMP_WARNING(SetLibraryIncorrectCall);
7330 return;
7331 }
7332
7333 switch (arg) {
7334 case library_serial:
7335 thread->th.th_set_nproc = 0;
7336 set__nproc(thread, 1);
7337 break;
7338 case library_turnaround:
7339 thread->th.th_set_nproc = 0;
7340 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7341 : __kmp_dflt_team_nth_ub);
7342 break;
7343 case library_throughput:
7344 thread->th.th_set_nproc = 0;
7345 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7346 : __kmp_dflt_team_nth_ub);
7347 break;
7348 default:
7349 KMP_FATAL(UnknownLibraryType, arg);
7350 }
7351
7352 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007353}
7354
Jonathan Peyton30419822017-05-12 18:01:32 +00007355void __kmp_aux_set_stacksize(size_t arg) {
7356 if (!__kmp_init_serial)
7357 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007358
7359#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007360 if (arg & (0x1000 - 1)) {
7361 arg &= ~(0x1000 - 1);
7362 if (arg + 0x1000) /* check for overflow if we round up */
7363 arg += 0x1000;
7364 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007365#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007366 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007367
Jonathan Peyton30419822017-05-12 18:01:32 +00007368 /* only change the default stacksize before the first parallel region */
7369 if (!TCR_4(__kmp_init_parallel)) {
7370 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007371
Jonathan Peyton30419822017-05-12 18:01:32 +00007372 if (value < __kmp_sys_min_stksize)
7373 value = __kmp_sys_min_stksize;
7374 else if (value > KMP_MAX_STKSIZE)
7375 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007376
Jonathan Peyton30419822017-05-12 18:01:32 +00007377 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007378
Jonathan Peyton30419822017-05-12 18:01:32 +00007379 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7380 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007381
Jonathan Peyton30419822017-05-12 18:01:32 +00007382 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007383}
7384
7385/* set the behaviour of the runtime library */
7386/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007387void __kmp_aux_set_library(enum library_type arg) {
7388 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007389
Jonathan Peyton30419822017-05-12 18:01:32 +00007390 switch (__kmp_library) {
7391 case library_serial: {
7392 KMP_INFORM(LibraryIsSerial);
7393 (void)__kmp_change_library(TRUE);
7394 } break;
7395 case library_turnaround:
7396 (void)__kmp_change_library(TRUE);
7397 break;
7398 case library_throughput:
7399 (void)__kmp_change_library(FALSE);
7400 break;
7401 default:
7402 KMP_FATAL(UnknownLibraryType, arg);
7403 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007404}
7405
7406/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007407
Jonathan Peyton30419822017-05-12 18:01:32 +00007408void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7409 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007410#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007411 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007412#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007413 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007414
Jonathan Peyton30419822017-05-12 18:01:32 +00007415 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007416
Jonathan Peyton30419822017-05-12 18:01:32 +00007417 /* Normalize and set blocktime for the teams */
7418 if (blocktime < KMP_MIN_BLOCKTIME)
7419 blocktime = KMP_MIN_BLOCKTIME;
7420 else if (blocktime > KMP_MAX_BLOCKTIME)
7421 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007422
Jonathan Peyton30419822017-05-12 18:01:32 +00007423 set__blocktime_team(thread->th.th_team, tid, blocktime);
7424 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007425
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007426#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007427 /* Calculate and set blocktime intervals for the teams */
7428 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007429
Jonathan Peyton30419822017-05-12 18:01:32 +00007430 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7431 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007432#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007433
Jonathan Peyton30419822017-05-12 18:01:32 +00007434 /* Set whether blocktime has been set to "TRUE" */
7435 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007436
Jonathan Peyton30419822017-05-12 18:01:32 +00007437 set__bt_set_team(thread->th.th_team, tid, bt_set);
7438 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007439#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007440 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7441 "bt_intervals=%d, monitor_updates=%d\n",
7442 __kmp_gtid_from_tid(tid, thread->th.th_team),
7443 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7444 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007445#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007446 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7447 __kmp_gtid_from_tid(tid, thread->th.th_team),
7448 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007449#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007450}
7451
Jonathan Peyton30419822017-05-12 18:01:32 +00007452void __kmp_aux_set_defaults(char const *str, int len) {
7453 if (!__kmp_init_serial) {
7454 __kmp_serial_initialize();
7455 };
7456 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007457
Jonathan Peyton30419822017-05-12 18:01:32 +00007458 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007459#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007460 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007461#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007462 ) {
7463 __kmp_env_print();
7464 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007465} // __kmp_aux_set_defaults
7466
7467/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007468/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007469
Jim Cownie5e8470a2013-09-27 10:38:44 +00007470PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007471__kmp_determine_reduction_method(
7472 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7473 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7474 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007475
Jonathan Peyton30419822017-05-12 18:01:32 +00007476 // Default reduction method: critical construct ( lck != NULL, like in current
7477 // PAROPT )
7478 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7479 // can be selected by RTL
7480 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7481 // can be selected by RTL
7482 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7483 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007484
Jonathan Peyton30419822017-05-12 18:01:32 +00007485 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007486
Jonathan Peyton30419822017-05-12 18:01:32 +00007487 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007488
Jonathan Peyton30419822017-05-12 18:01:32 +00007489 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7490 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007491
Jonathan Peyton30419822017-05-12 18:01:32 +00007492#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7493 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7494#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007495
Jonathan Peyton30419822017-05-12 18:01:32 +00007496 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007497
Jonathan Peyton30419822017-05-12 18:01:32 +00007498 // another choice of getting a team size (with 1 dynamic deference) is slower
7499 team_size = __kmp_get_team_num_threads(global_tid);
7500 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007501
Jonathan Peyton30419822017-05-12 18:01:32 +00007502 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007503
Jonathan Peyton30419822017-05-12 18:01:32 +00007504 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7507 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007508
Jonathan Peyton30419822017-05-12 18:01:32 +00007509#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007510
Jonathan Peyton30419822017-05-12 18:01:32 +00007511#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7512 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007513
Jonathan Peyton30419822017-05-12 18:01:32 +00007514 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007515
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007516#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007517 if (__kmp_mic_type != non_mic) {
7518 teamsize_cutoff = 8;
7519 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007520#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007521 if (tree_available) {
7522 if (team_size <= teamsize_cutoff) {
7523 if (atomic_available) {
7524 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007525 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007526 } else {
7527 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7528 }
7529 } else if (atomic_available) {
7530 retval = atomic_reduce_block;
7531 }
7532#else
7533#error "Unknown or unsupported OS"
7534#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7535// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007536
Jonathan Peyton30419822017-05-12 18:01:32 +00007537#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7538
7539#if KMP_OS_LINUX || KMP_OS_WINDOWS
7540
7541 // basic tuning
7542
7543 if (atomic_available) {
7544 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7545 retval = atomic_reduce_block;
7546 }
7547 } // otherwise: use critical section
7548
7549#elif KMP_OS_DARWIN
7550
7551 if (atomic_available && (num_vars <= 3)) {
7552 retval = atomic_reduce_block;
7553 } else if (tree_available) {
7554 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7555 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7556 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7557 }
7558 } // otherwise: use critical section
7559
7560#else
7561#error "Unknown or unsupported OS"
7562#endif
7563
7564#else
7565#error "Unknown or unsupported architecture"
7566#endif
7567 }
7568
7569 // KMP_FORCE_REDUCTION
7570
7571 // If the team is serialized (team_size == 1), ignore the forced reduction
7572 // method and stay with the unsynchronized method (empty_reduce_block)
7573 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7574 team_size != 1) {
7575
7576 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7577
7578 int atomic_available, tree_available;
7579
7580 switch ((forced_retval = __kmp_force_reduction_method)) {
7581 case critical_reduce_block:
7582 KMP_ASSERT(lck); // lck should be != 0
7583 break;
7584
7585 case atomic_reduce_block:
7586 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7587 if (!atomic_available) {
7588 KMP_WARNING(RedMethodNotSupported, "atomic");
7589 forced_retval = critical_reduce_block;
7590 }
7591 break;
7592
7593 case tree_reduce_block:
7594 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7595 if (!tree_available) {
7596 KMP_WARNING(RedMethodNotSupported, "tree");
7597 forced_retval = critical_reduce_block;
7598 } else {
7599#if KMP_FAST_REDUCTION_BARRIER
7600 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7601#endif
7602 }
7603 break;
7604
7605 default:
7606 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007607 }
7608
Jonathan Peyton30419822017-05-12 18:01:32 +00007609 retval = forced_retval;
7610 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007611
Jonathan Peyton30419822017-05-12 18:01:32 +00007612 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007613
Jonathan Peyton30419822017-05-12 18:01:32 +00007614#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7615#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7616
7617 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007618}
7619
7620// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007621kmp_int32 __kmp_get_reduce_method(void) {
7622 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007623}