blob: df0ae04b0caf81ba87cb758a0eafa33bd0597c13 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_i18n.h"
20#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000021#include "kmp_itt.h"
22#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000023#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000024#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000027
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000028#if OMPT_SUPPORT
29#include "ompt-specific.h"
30#endif
31
Jim Cownie5e8470a2013-09-27 10:38:44 +000032/* these are temporary issues to be dealt with */
33#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000034
Jim Cownie5e8470a2013-09-27 10:38:44 +000035#if KMP_OS_WINDOWS
36#include <process.h>
37#endif
38
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000039#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000040
41#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000042char const __kmp_version_alt_comp[] =
43 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000044#endif /* defined(KMP_GOMP_COMPAT) */
45
46char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000047#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000048 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000049#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000050 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000053#else
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000055#endif
56
57#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000058char const __kmp_version_lock[] =
59 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jonathan Peyton30419822017-05-12 18:01:32 +000062#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000063
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000065
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000066#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +000067kmp_info_t __kmp_monitor;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000068#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000069
Jim Cownie5e8470a2013-09-27 10:38:44 +000070/* Forward declarations */
71
Jonathan Peyton30419822017-05-12 18:01:32 +000072void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
75 int gtid);
76static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
77 kmp_internal_control_t *new_icvs,
78 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000079#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000080static void __kmp_partition_places(kmp_team_t *team,
81 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000082#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000083static void __kmp_do_serial_initialize(void);
84void __kmp_fork_barrier(int gtid, int tid);
85void __kmp_join_barrier(int gtid);
86void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000088
Jim Cownie5e8470a2013-09-27 10:38:44 +000089#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000090static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000091#endif
92
Jonathan Peyton1800ece2018-01-10 18:27:01 +000093static int __kmp_expand_threads(int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000094#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000095static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000097static void __kmp_unregister_library(void); // called by __kmp_internal_end()
98static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jonathan Peytoneaa9e402018-01-10 18:21:48 +000099kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000102/* fast (and somewhat portable) way to get unique identifier of executing
103 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000104int __kmp_get_global_thread_id() {
105 int i;
106 kmp_info_t **other_threads;
107 size_t stack_data;
108 char *stack_addr;
109 size_t stack_size;
110 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
Jonathan Peyton30419822017-05-12 18:01:32 +0000112 KA_TRACE(
113 1000,
114 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000116
Jonathan Peyton30419822017-05-12 18:01:32 +0000117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
118 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
119 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
120 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121
Jonathan Peyton30419822017-05-12 18:01:32 +0000122 if (!TCR_4(__kmp_init_gtid))
123 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124
125#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000126 if (TCR_4(__kmp_gtid_mode) >= 3) {
127 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
128 return __kmp_gtid;
129 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000131 if (TCR_4(__kmp_gtid_mode) >= 2) {
132 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
133 return __kmp_gtid_get_specific();
134 }
135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
136
137 stack_addr = (char *)&stack_data;
138 other_threads = __kmp_threads;
139
140 /* ATT: The code below is a source of potential bugs due to unsynchronized
141 access to __kmp_threads array. For example:
142 1. Current thread loads other_threads[i] to thr and checks it, it is
143 non-NULL.
144 2. Current thread is suspended by OS.
145 3. Another thread unregisters and finishes (debug versions of free()
146 may fill memory with something like 0xEF).
147 4. Current thread is resumed.
148 5. Current thread reads junk from *thr.
149 TODO: Fix it. --ln */
150
151 for (i = 0; i < __kmp_threads_capacity; i++) {
152
153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
154 if (!thr)
155 continue;
156
157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
159
160 /* stack grows down -- search through all of the active threads */
161
162 if (stack_addr <= stack_base) {
163 size_t stack_diff = stack_base - stack_addr;
164
165 if (stack_diff <= stack_size) {
166 /* The only way we can be closer than the allocated */
167 /* stack size is if we are running on this thread. */
168 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
169 return i;
170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173
Jonathan Peyton30419822017-05-12 18:01:32 +0000174 /* get specific to try and determine our gtid */
175 KA_TRACE(1000,
176 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
177 "thread, using TLS\n"));
178 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181
Jonathan Peyton30419822017-05-12 18:01:32 +0000182 /* if we havn't been assigned a gtid, then return code */
183 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000185
186 /* dynamically updated stack window for uber threads to avoid get_specific
187 call */
188 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
189 KMP_FATAL(StackOverflow, i);
190 }
191
192 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
193 if (stack_addr > stack_base) {
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
196 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
197 stack_base);
198 } else {
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 stack_base - stack_addr);
201 }
202
203 /* Reprint stack bounds for ubermaster since they have been refined */
204 if (__kmp_storage_map) {
205 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
207 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
208 other_threads[i]->th.th_info.ds.ds_stacksize,
209 "th_%d stack (refinement)", i);
210 }
211 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
Jonathan Peyton30419822017-05-12 18:01:32 +0000214int __kmp_get_global_thread_id_reg() {
215 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216
Jonathan Peyton30419822017-05-12 18:01:32 +0000217 if (!__kmp_init_serial) {
218 gtid = KMP_GTID_DNE;
219 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000221 if (TCR_4(__kmp_gtid_mode) >= 3) {
222 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
223 gtid = __kmp_gtid;
224 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000226 if (TCR_4(__kmp_gtid_mode) >= 2) {
227 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
228 gtid = __kmp_gtid_get_specific();
229 } else {
230 KA_TRACE(1000,
231 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
232 gtid = __kmp_get_global_thread_id();
233 }
234
235 /* we must be a new uber master sibling thread */
236 if (gtid == KMP_GTID_DNE) {
237 KA_TRACE(10,
238 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
239 "Registering a new gtid.\n"));
240 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
241 if (!__kmp_init_serial) {
242 __kmp_do_serial_initialize();
243 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000247 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
248 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
249 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
Jonathan Peyton30419822017-05-12 18:01:32 +0000253 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254}
255
256/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000257void __kmp_check_stack_overlap(kmp_info_t *th) {
258 int f;
259 char *stack_beg = NULL;
260 char *stack_end = NULL;
261 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
Jonathan Peyton30419822017-05-12 18:01:32 +0000263 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
264 if (__kmp_storage_map) {
265 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
266 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269
Jonathan Peyton30419822017-05-12 18:01:32 +0000270 if (gtid == KMP_GTID_MONITOR) {
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%s stack (%s)", "mon",
274 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000276 __kmp_print_storage_map_gtid(
277 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
278 "th_%d stack (%s)", gtid,
279 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
280 }
281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 /* No point in checking ubermaster threads since they use refinement and
284 * cannot overlap */
285 gtid = __kmp_gtid_from_thread(th);
286 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
287 KA_TRACE(10,
288 ("__kmp_check_stack_overlap: performing extensive checking\n"));
289 if (stack_beg == NULL) {
290 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
291 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 }
293
294 for (f = 0; f < __kmp_threads_capacity; f++) {
295 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
296
297 if (f_th && f_th != th) {
298 char *other_stack_end =
299 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
300 char *other_stack_beg =
301 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
302 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
303 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
304
305 /* Print the other stack values before the abort */
306 if (__kmp_storage_map)
307 __kmp_print_storage_map_gtid(
308 -1, other_stack_beg, other_stack_end,
309 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
310 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
311
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000312 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
313 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 }
315 }
316 }
317 }
318 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
319}
320
321/* ------------------------------------------------------------------------ */
322
323void __kmp_infinite_loop(void) {
324 static int done = FALSE;
325
326 while (!done) {
327 KMP_YIELD(1);
328 }
329}
330
331#define MAX_MESSAGE 512
332
333void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
334 char const *format, ...) {
335 char buffer[MAX_MESSAGE];
336 va_list ap;
337
338 va_start(ap, format);
339 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
340 p2, (unsigned long)size, format);
341 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
342 __kmp_vprintf(kmp_err, buffer, ap);
343#if KMP_PRINT_DATA_PLACEMENT
344 int node;
345 if (gtid >= 0) {
346 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
347 if (__kmp_storage_map_verbose) {
348 node = __kmp_get_host_node(p1);
349 if (node < 0) /* doesn't work, so don't try this next time */
350 __kmp_storage_map_verbose = FALSE;
351 else {
352 char *last;
353 int lastNode;
354 int localProc = __kmp_get_cpu_from_gtid(gtid);
355
356 const int page_size = KMP_GET_PAGE_SIZE();
357
358 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
359 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
360 if (localProc >= 0)
361 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
362 localProc >> 1);
363 else
364 __kmp_printf_no_lock(" GTID %d\n", gtid);
365#if KMP_USE_PRCTL
366 /* The more elaborate format is disabled for now because of the prctl
367 * hanging bug. */
368 do {
369 last = p1;
370 lastNode = node;
371 /* This loop collates adjacent pages with the same host node. */
372 do {
373 (char *)p1 += page_size;
374 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
376 lastNode);
377 } while (p1 <= p2);
378#else
379 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
380 (char *)p1 + (page_size - 1),
381 __kmp_get_host_node(p1));
382 if (p1 < p2) {
383 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
384 (char *)p2 + (page_size - 1),
385 __kmp_get_host_node(p2));
386 }
387#endif
388 }
389 }
390 } else
391 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
392 }
393#endif /* KMP_PRINT_DATA_PLACEMENT */
394 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
395}
396
397void __kmp_warn(char const *format, ...) {
398 char buffer[MAX_MESSAGE];
399 va_list ap;
400
401 if (__kmp_generate_warnings == kmp_warnings_off) {
402 return;
403 }
404
405 va_start(ap, format);
406
407 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
408 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
409 __kmp_vprintf(kmp_err, buffer, ap);
410 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
411
412 va_end(ap);
413}
414
415void __kmp_abort_process() {
416 // Later threads may stall here, but that's ok because abort() will kill them.
417 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
418
419 if (__kmp_debug_buf) {
420 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000421 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000422
423 if (KMP_OS_WINDOWS) {
424 // Let other threads know of abnormal termination and prevent deadlock
425 // if abort happened during library initialization or shutdown
426 __kmp_global.g.g_abort = SIGABRT;
427
428 /* On Windows* OS by default abort() causes pop-up error box, which stalls
429 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
430 boxes. _set_abort_behavior() works well, but this function is not
431 available in VS7 (this is not problem for DLL, but it is a problem for
432 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
433 help, at least in some versions of MS C RTL.
434
435 It seems following sequence is the only way to simulate abort() and
436 avoid pop-up error box. */
437 raise(SIGABRT);
438 _exit(3); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000441 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445
446} // __kmp_abort_process
447
Jonathan Peyton30419822017-05-12 18:01:32 +0000448void __kmp_abort_thread(void) {
449 // TODO: Eliminate g_abort global variable and this function.
450 // In case of abort just call abort(), it will kill all the threads.
451 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000452} // __kmp_abort_thread
453
Jonathan Peyton30419822017-05-12 18:01:32 +0000454/* Print out the storage map for the major kmp_info_t thread data structures
455 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
458 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
459 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
462 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
465 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 __kmp_print_storage_map_gtid(
468 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier + 1],
473 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
474 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
477 &thr->th.th_bar[bs_forkjoin_barrier + 1],
478 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
479 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480
Jonathan Peyton30419822017-05-12 18:01:32 +0000481#if KMP_FAST_REDUCTION_BARRIER
482 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
483 &thr->th.th_bar[bs_reduction_barrier + 1],
484 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
485 gtid);
486#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487}
488
Jonathan Peyton30419822017-05-12 18:01:32 +0000489/* Print out the storage map for the major kmp_team_t team data structures
490 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
493 int team_id, int num_thr) {
494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
495 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
496 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000497
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
499 &team->t.t_bar[bs_last_barrier],
500 sizeof(kmp_balign_team_t) * bs_last_barrier,
501 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
504 &team->t.t_bar[bs_plain_barrier + 1],
505 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
506 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507
Jonathan Peyton30419822017-05-12 18:01:32 +0000508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
509 &team->t.t_bar[bs_forkjoin_barrier + 1],
510 sizeof(kmp_balign_team_t),
511 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512
Jonathan Peyton30419822017-05-12 18:01:32 +0000513#if KMP_FAST_REDUCTION_BARRIER
514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
515 &team->t.t_bar[bs_reduction_barrier + 1],
516 sizeof(kmp_balign_team_t),
517 "%s_%d.t_bar[reduction]", header, team_id);
518#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 __kmp_print_storage_map_gtid(
521 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
522 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
526 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
Jonathan Peyton30419822017-05-12 18:01:32 +0000528 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
529 &team->t.t_disp_buffer[num_disp_buff],
530 sizeof(dispatch_shared_info_t) * num_disp_buff,
531 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532
Jonathan Peyton30419822017-05-12 18:01:32 +0000533 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
534 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
535 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536}
537
538static void __kmp_init_allocator() {}
539static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540
541/* ------------------------------------------------------------------------ */
542
Jonathan Peyton99016992015-05-26 17:32:53 +0000543#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000544#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545
Jonathan Peyton30419822017-05-12 18:01:32 +0000546static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
547 // TODO: Change to __kmp_break_bootstrap_lock().
548 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549}
550
Jonathan Peyton30419822017-05-12 18:01:32 +0000551static void __kmp_reset_locks_on_process_detach(int gtid_req) {
552 int i;
553 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000554
Jonathan Peyton30419822017-05-12 18:01:32 +0000555 // PROCESS_DETACH is expected to be called by a thread that executes
556 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
557 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
558 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
559 // threads can be still alive here, although being about to be terminated. The
560 // threads in the array with ds_thread==0 are most suspicious. Actually, it
561 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564
Jonathan Peyton30419822017-05-12 18:01:32 +0000565 // Let's check that there are no other alive threads registered with the OMP
566 // lib.
567 while (1) {
568 thread_count = 0;
569 for (i = 0; i < __kmp_threads_capacity; ++i) {
570 if (!__kmp_threads)
571 continue;
572 kmp_info_t *th = __kmp_threads[i];
573 if (th == NULL)
574 continue;
575 int gtid = th->th.th_info.ds.ds_gtid;
576 if (gtid == gtid_req)
577 continue;
578 if (gtid < 0)
579 continue;
580 DWORD exit_val;
581 int alive = __kmp_is_thread_alive(th, &exit_val);
582 if (alive) {
583 ++thread_count;
584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000586 if (thread_count == 0)
587 break; // success
588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 // Assume that I'm alone. Now it might be safe to check and reset locks.
591 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
592 __kmp_reset_lock(&__kmp_forkjoin_lock);
593#ifdef KMP_DEBUG
594 __kmp_reset_lock(&__kmp_stdio_lock);
595#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596}
597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
599 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 case DLL_PROCESS_ATTACH:
604 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
606 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000607
608 case DLL_PROCESS_DETACH:
609 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
610
611 if (lpReserved != NULL) {
612 // lpReserved is used for telling the difference:
613 // lpReserved == NULL when FreeLibrary() was called,
614 // lpReserved != NULL when the process terminates.
615 // When FreeLibrary() is called, worker threads remain alive. So they will
616 // release the forkjoin lock by themselves. When the process terminates,
617 // worker threads disappear triggering the problem of unreleased forkjoin
618 // lock as described below.
619
620 // A worker thread can take the forkjoin lock. The problem comes up if
621 // that worker thread becomes dead before it releases the forkjoin lock.
622 // The forkjoin lock remains taken, while the thread executing
623 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
624 // to take the forkjoin lock and will always fail, so that the application
625 // will never finish [normally]. This scenario is possible if
626 // __kmpc_end() has not been executed. It looks like it's not a corner
627 // case, but common cases:
628 // - the main function was compiled by an alternative compiler;
629 // - the main function was compiled by icl but without /Qopenmp
630 // (application with plugins);
631 // - application terminates by calling C exit(), Fortran CALL EXIT() or
632 // Fortran STOP.
633 // - alive foreign thread prevented __kmpc_end from doing cleanup.
634 //
635 // This is a hack to work around the problem.
636 // TODO: !!! figure out something better.
637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
638 }
639
640 __kmp_internal_end_library(__kmp_gtid_get_specific());
641
642 return TRUE;
643
644 case DLL_THREAD_ATTACH:
645 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
646
647 /* if we want to register new siblings all the time here call
648 * __kmp_get_gtid(); */
649 return TRUE;
650
651 case DLL_THREAD_DETACH:
652 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
653
654 __kmp_internal_end_thread(__kmp_gtid_get_specific());
655 return TRUE;
656 }
657
658 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659}
660
Jonathan Peyton30419822017-05-12 18:01:32 +0000661#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000662#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000666int __kmp_change_library(int status) {
667 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000668
Jonathan Peyton30419822017-05-12 18:01:32 +0000669 old_status = __kmp_yield_init &
670 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 if (status) {
673 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
674 } else {
675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677
Jonathan Peyton30419822017-05-12 18:01:32 +0000678 return old_status; // return previous setting of whether
679 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000680}
681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682/* __kmp_parallel_deo -- Wait until it's our turn. */
683void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
684 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687#endif /* BUILD_PARALLEL_ORDERED */
688
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 if (__kmp_env_consistency_check) {
690 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 if (!team->t.t_serialized) {
699 KMP_MB();
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
701 KMP_EQ, NULL);
702 KMP_MB();
703 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704#endif /* BUILD_PARALLEL_ORDERED */
705}
706
Jonathan Peyton30419822017-05-12 18:01:32 +0000707/* __kmp_parallel_dxo -- Signal the next task. */
708void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
709 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 int tid = __kmp_tid_from_gtid(gtid);
712 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713#endif /* BUILD_PARALLEL_ORDERED */
714
Jonathan Peyton30419822017-05-12 18:01:32 +0000715 if (__kmp_env_consistency_check) {
716 if (__kmp_threads[gtid]->th.th_root->r.r_active)
717 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
718 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000719#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 if (!team->t.t_serialized) {
721 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722
Jonathan Peyton30419822017-05-12 18:01:32 +0000723 /* use the tid of the next thread in this team */
724 /* TODO replace with general release procedure */
725 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 KMP_MB(); /* Flush all pending memory write invalidates. */
728 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729#endif /* BUILD_PARALLEL_ORDERED */
730}
731
732/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733/* The BARRIER for a SINGLE process section is always explicit */
734
Jonathan Peyton30419822017-05-12 18:01:32 +0000735int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
736 int status;
737 kmp_info_t *th;
738 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739
Jonathan Peyton30419822017-05-12 18:01:32 +0000740 if (!TCR_4(__kmp_init_parallel))
741 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742
Jonathan Peyton30419822017-05-12 18:01:32 +0000743 th = __kmp_threads[gtid];
744 team = th->th.th_team;
745 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748
Jonathan Peyton30419822017-05-12 18:01:32 +0000749 if (team->t.t_serialized) {
750 status = 1;
751 } else {
752 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753
Jonathan Peyton30419822017-05-12 18:01:32 +0000754 ++th->th.th_local.this_construct;
755 /* try to set team count to thread count--success means thread got the
756 single block */
757 /* TODO: Should this be acquire or release? */
758 if (team->t.t_construct == old_this) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000759 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
760 th->th.th_local.this_construct);
Jonathan Peyton30419822017-05-12 18:01:32 +0000761 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000762#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
764 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000765#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 team->t.t_active_level ==
769 1) { // Only report metadata by master of active team at level 1
770 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000772#endif /* USE_ITT_BUILD */
773 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000774
Jonathan Peyton30419822017-05-12 18:01:32 +0000775 if (__kmp_env_consistency_check) {
776 if (status && push_ws) {
777 __kmp_push_workshare(gtid, ct_psingle, id_ref);
778 } else {
779 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 if (status) {
784 __kmp_itt_single_start(gtid);
785 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788}
789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 if (__kmp_env_consistency_check)
795 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796}
797
Jonathan Peyton30419822017-05-12 18:01:32 +0000798/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000799 * how many threads we can use
800 * set_nproc is the number of threads requested for the team
801 * returns 0 if we should serialize or only use one thread,
802 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 * The forkjoin lock is held by the caller. */
804static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
805 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 ,
808 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000809#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000810 ) {
811 int capacity;
812 int new_nthreads;
813 KMP_DEBUG_ASSERT(__kmp_init_serial);
814 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000815
Jonathan Peyton30419822017-05-12 18:01:32 +0000816 // If dyn-var is set, dynamically adjust the number of desired threads,
817 // according to the method specified by dynamic_mode.
818 new_nthreads = set_nthreads;
819 if (!get__dynamic_2(parent_team, master_tid)) {
820 ;
821 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000823 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
824 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
825 if (new_nthreads == 1) {
826 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
827 "reservation to 1 thread\n",
828 master_tid));
829 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000831 if (new_nthreads < set_nthreads) {
832 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
833 "reservation to %d threads\n",
834 master_tid, new_nthreads));
835 }
836 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
839 new_nthreads = __kmp_avail_proc - __kmp_nth +
840 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
841 if (new_nthreads <= 1) {
842 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
843 "reservation to 1 thread\n",
844 master_tid));
845 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000847 if (new_nthreads < set_nthreads) {
848 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
849 "reservation to %d threads\n",
850 master_tid, new_nthreads));
851 } else {
852 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
855 if (set_nthreads > 2) {
856 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
857 new_nthreads = (new_nthreads % set_nthreads) + 1;
858 if (new_nthreads == 1) {
859 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
860 "reservation to 1 thread\n",
861 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000863 }
864 if (new_nthreads < set_nthreads) {
865 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
866 "reservation to %d threads\n",
867 master_tid, new_nthreads));
868 }
869 }
870 } else {
871 KMP_ASSERT(0);
872 }
873
Jonathan Peytonf4392462017-07-27 20:58:41 +0000874 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000875 if (__kmp_nth + new_nthreads -
876 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
877 __kmp_max_nth) {
878 int tl_nthreads = __kmp_max_nth - __kmp_nth +
879 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
880 if (tl_nthreads <= 0) {
881 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882 }
883
Jonathan Peyton30419822017-05-12 18:01:32 +0000884 // If dyn-var is false, emit a 1-time warning.
885 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
886 __kmp_reserve_warn = 1;
887 __kmp_msg(kmp_ms_warning,
888 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
889 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
890 }
891 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000892 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
893 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 master_tid));
895 return 1;
896 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000897 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
898 "reservation to %d threads\n",
899 master_tid, tl_nthreads));
900 new_nthreads = tl_nthreads;
901 }
902
903 // Respect OMP_THREAD_LIMIT
904 if (root->r.r_cg_nthreads + new_nthreads -
905 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
906 __kmp_cg_max_nth) {
907 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
908 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
909 if (tl_nthreads <= 0) {
910 tl_nthreads = 1;
911 }
912
913 // If dyn-var is false, emit a 1-time warning.
914 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
915 __kmp_reserve_warn = 1;
916 __kmp_msg(kmp_ms_warning,
917 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
918 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
919 }
920 if (tl_nthreads == 1) {
921 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
922 "reduced reservation to 1 thread\n",
923 master_tid));
924 return 1;
925 }
926 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000927 "reservation to %d threads\n",
928 master_tid, tl_nthreads));
929 new_nthreads = tl_nthreads;
930 }
931
932 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000933 // See comment in __kmp_register_root() about the adjustment if
934 // __kmp_threads[0] == NULL.
935 capacity = __kmp_threads_capacity;
936 if (TCR_PTR(__kmp_threads[0]) == NULL) {
937 --capacity;
938 }
939 if (__kmp_nth + new_nthreads -
940 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
941 capacity) {
942 // Expand the threads array.
943 int slotsRequired = __kmp_nth + new_nthreads -
944 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
945 capacity;
Jonathan Peyton1800ece2018-01-10 18:27:01 +0000946 int slotsAdded = __kmp_expand_threads(slotsRequired);
Jonathan Peyton30419822017-05-12 18:01:32 +0000947 if (slotsAdded < slotsRequired) {
948 // The threads array was not expanded enough.
949 new_nthreads -= (slotsRequired - slotsAdded);
950 KMP_ASSERT(new_nthreads >= 1);
951
952 // If dyn-var is false, emit a 1-time warning.
953 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
954 __kmp_reserve_warn = 1;
955 if (__kmp_tp_cached) {
956 __kmp_msg(kmp_ms_warning,
957 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
958 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
959 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
960 } else {
961 __kmp_msg(kmp_ms_warning,
962 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
963 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
964 }
965 }
966 }
967 }
968
Jonathan Peyton642688b2017-06-01 16:46:36 +0000969#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000970 if (new_nthreads == 1) {
971 KC_TRACE(10,
972 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
973 "dead roots and rechecking; requested %d threads\n",
974 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000975 } else {
976 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
977 " %d threads\n",
978 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000979 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000980#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000981 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000982}
983
Jonathan Peyton30419822017-05-12 18:01:32 +0000984/* Allocate threads from the thread pool and assign them to the new team. We are
985 assured that there are enough threads available, because we checked on that
986 earlier within critical section forkjoin */
987static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
988 kmp_info_t *master_th, int master_gtid) {
989 int i;
990 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000991
Jonathan Peyton30419822017-05-12 18:01:32 +0000992 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
993 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
994 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 /* first, let's setup the master thread */
997 master_th->th.th_info.ds.ds_tid = 0;
998 master_th->th.th_team = team;
999 master_th->th.th_team_nproc = team->t.t_nproc;
1000 master_th->th.th_team_master = master_th;
1001 master_th->th.th_team_serialized = FALSE;
1002 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001003
Jonathan Peyton30419822017-05-12 18:01:32 +00001004/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001005#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001006 use_hot_team = 0;
1007 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1008 if (hot_teams) { // hot teams array is not allocated if
1009 // KMP_HOT_TEAMS_MAX_LEVEL=0
1010 int level = team->t.t_active_level - 1; // index in array of hot teams
1011 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1012 if (master_th->th.th_teams_size.nteams > 1) {
1013 ++level; // level was not increased in teams construct for
1014 // team_of_masters
1015 }
1016 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1017 master_th->th.th_teams_level == team->t.t_level) {
1018 ++level; // level was not increased in teams construct for
1019 // team_of_workers before the parallel
1020 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001021 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001022 if (level < __kmp_hot_teams_max_level) {
1023 if (hot_teams[level].hot_team) {
1024 // hot team has already been allocated for given level
1025 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1026 use_hot_team = 1; // the team is ready to use
1027 } else {
1028 use_hot_team = 0; // AC: threads are not allocated yet
1029 hot_teams[level].hot_team = team; // remember new hot team
1030 hot_teams[level].hot_team_nth = team->t.t_nproc;
1031 }
1032 } else {
1033 use_hot_team = 0;
1034 }
1035 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001036#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001037 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001038#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001040
Jonathan Peyton30419822017-05-12 18:01:32 +00001041 /* install the master thread */
1042 team->t.t_threads[0] = master_th;
1043 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001044
Jonathan Peyton30419822017-05-12 18:01:32 +00001045 /* now, install the worker threads */
1046 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001047
Jonathan Peyton30419822017-05-12 18:01:32 +00001048 /* fork or reallocate a new thread and install it in team */
1049 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1050 team->t.t_threads[i] = thr;
1051 KMP_DEBUG_ASSERT(thr);
1052 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1053 /* align team and thread arrived states */
1054 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1055 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1056 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1057 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1058 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1059 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001060#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001061 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1062 thr->th.th_teams_level = master_th->th.th_teams_level;
1063 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001065 { // Initialize threads' barrier data.
1066 int b;
1067 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1068 for (b = 0; b < bs_last_barrier; ++b) {
1069 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1070 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001071#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001072 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001073#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 }
1076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Alp Toker98758b02014-03-02 04:12:06 +00001078#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001081 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082
Jonathan Peyton30419822017-05-12 18:01:32 +00001083 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084}
1085
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001086#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001087// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001088// We try to avoid unnecessary writes to the relevant cache line in the team
1089// structure, so we don't make changes unless they are needed.
1090inline static void propagateFPControl(kmp_team_t *team) {
1091 if (__kmp_inherit_fp_control) {
1092 kmp_int16 x87_fpu_control_word;
1093 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001094
Jonathan Peyton30419822017-05-12 18:01:32 +00001095 // Get master values of FPU control flags (both X87 and vector)
1096 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1097 __kmp_store_mxcsr(&mxcsr);
1098 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001099
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001100 // There is no point looking at t_fp_control_saved here.
1101 // If it is TRUE, we still have to update the values if they are different
Jonas Hahnfeldf0a1c652017-11-03 18:28:19 +00001102 // from those we now have. If it is FALSE we didn't save anything yet, but
1103 // our objective is the same. We have to ensure that the values in the team
1104 // are the same as those we have.
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001105 // So, this code achieves what we need whether or not t_fp_control_saved is
1106 // true. By checking whether the value needs updating we avoid unnecessary
1107 // writes that would put the cache-line into a written state, causing all
1108 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001109 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1110 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1111 // Although we don't use this value, other code in the runtime wants to know
1112 // whether it should restore them. So we must ensure it is correct.
1113 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1114 } else {
1115 // Similarly here. Don't write to this cache-line in the team structure
1116 // unless we have to.
1117 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1118 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001119}
1120
Jonathan Peyton30419822017-05-12 18:01:32 +00001121// Do the opposite, setting the hardware registers to the updated values from
1122// the team.
1123inline static void updateHWFPControl(kmp_team_t *team) {
1124 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1125 // Only reset the fp control regs if they have been changed in the team.
1126 // the parallel region that we are exiting.
1127 kmp_int16 x87_fpu_control_word;
1128 kmp_uint32 mxcsr;
1129 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1130 __kmp_store_mxcsr(&mxcsr);
1131 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001132
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1134 __kmp_clear_x87_fpu_status_word();
1135 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001136 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001137
1138 if (team->t.t_mxcsr != mxcsr) {
1139 __kmp_load_mxcsr(&team->t.t_mxcsr);
1140 }
1141 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001142}
1143#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001144#define propagateFPControl(x) ((void)0)
1145#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001146#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1147
Jonathan Peyton30419822017-05-12 18:01:32 +00001148static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1149 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001150
Jonathan Peyton30419822017-05-12 18:01:32 +00001151/* Run a parallel region that has been serialized, so runs only in a team of the
1152 single master thread. */
1153void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1154 kmp_info_t *this_thr;
1155 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001156
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001158
Jonathan Peyton30419822017-05-12 18:01:32 +00001159 /* Skip all this code for autopar serialized loops since it results in
1160 unacceptable overhead */
1161 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1162 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001163
Jonathan Peyton30419822017-05-12 18:01:32 +00001164 if (!TCR_4(__kmp_init_parallel))
1165 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001166
Jonathan Peyton30419822017-05-12 18:01:32 +00001167 this_thr = __kmp_threads[global_tid];
1168 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001169
Jonathan Peyton30419822017-05-12 18:01:32 +00001170 /* utilize the serialized team held by this thread */
1171 KMP_DEBUG_ASSERT(serial_team);
1172 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001173
Jonathan Peyton30419822017-05-12 18:01:32 +00001174 if (__kmp_tasking_mode != tskm_immediate_exec) {
1175 KMP_DEBUG_ASSERT(
1176 this_thr->th.th_task_team ==
1177 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1178 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1179 NULL);
1180 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1181 "team %p, new task_team = NULL\n",
1182 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1183 this_thr->th.th_task_team = NULL;
1184 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001185
1186#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1188 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1189 proc_bind = proc_bind_false;
1190 } else if (proc_bind == proc_bind_default) {
1191 // No proc_bind clause was specified, so use the current value
1192 // of proc-bind-var for this parallel region.
1193 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1194 }
1195 // Reset for next parallel region
1196 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001197#endif /* OMP_40_ENABLED */
1198
Joachim Protze82e94a52017-11-01 10:08:30 +00001199#if OMPT_SUPPORT
1200 ompt_data_t ompt_parallel_data;
1201 ompt_parallel_data.ptr = NULL;
1202 ompt_data_t *implicit_task_data;
1203 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1204 if (ompt_enabled.enabled &&
1205 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1206
1207 ompt_task_info_t *parent_task_info;
1208 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1209
Joachim Protzec255ca72017-11-05 14:11:10 +00001210 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001211 if (ompt_enabled.ompt_callback_parallel_begin) {
1212 int team_size = 1;
1213
1214 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1215 &(parent_task_info->task_data), &(parent_task_info->frame),
1216 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1217 }
1218 }
1219#endif // OMPT_SUPPORT
1220
Jonathan Peyton30419822017-05-12 18:01:32 +00001221 if (this_thr->th.th_team != serial_team) {
1222 // Nested level will be an index in the nested nthreads array
1223 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001224
Jonathan Peyton30419822017-05-12 18:01:32 +00001225 if (serial_team->t.t_serialized) {
1226 /* this serial team was already used
1227 TODO increase performance by making this locks more specific */
1228 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001229
Jonathan Peyton30419822017-05-12 18:01:32 +00001230 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001231
Jonathan Peyton30419822017-05-12 18:01:32 +00001232 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001234 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001235#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001236#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001237 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001239 &this_thr->th.th_current_task->td_icvs,
1240 0 USE_NESTED_HOT_ARG(NULL));
1241 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1242 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001243
Jonathan Peyton30419822017-05-12 18:01:32 +00001244 /* setup new serialized team and install it */
1245 new_team->t.t_threads[0] = this_thr;
1246 new_team->t.t_parent = this_thr->th.th_team;
1247 serial_team = new_team;
1248 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001249
Jonathan Peyton30419822017-05-12 18:01:32 +00001250 KF_TRACE(
1251 10,
1252 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1253 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001254
Jonathan Peyton30419822017-05-12 18:01:32 +00001255 /* TODO the above breaks the requirement that if we run out of resources,
1256 then we can still guarantee that serialized teams are ok, since we may
1257 need to allocate a new one */
1258 } else {
1259 KF_TRACE(
1260 10,
1261 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1262 global_tid, serial_team));
1263 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001264
Jonathan Peyton30419822017-05-12 18:01:32 +00001265 /* we have to initialize this serial team */
1266 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1267 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1268 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1269 serial_team->t.t_ident = loc;
1270 serial_team->t.t_serialized = 1;
1271 serial_team->t.t_nproc = 1;
1272 serial_team->t.t_parent = this_thr->th.th_team;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00001273 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00001274 this_thr->th.th_team = serial_team;
1275 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001276
Jonathan Peyton30419822017-05-12 18:01:32 +00001277 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1278 this_thr->th.th_current_task));
1279 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1280 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001283
Jonathan Peyton30419822017-05-12 18:01:32 +00001284 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1285 implicit task for each serialized task represented by
1286 team->t.t_serialized? */
1287 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1288 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001289
Jonathan Peyton30419822017-05-12 18:01:32 +00001290 // Thread value exists in the nested nthreads array for the next nested
1291 // level
1292 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1293 this_thr->th.th_current_task->td_icvs.nproc =
1294 __kmp_nested_nth.nth[level + 1];
1295 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001296
1297#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001298 if (__kmp_nested_proc_bind.used &&
1299 (level + 1 < __kmp_nested_proc_bind.used)) {
1300 this_thr->th.th_current_task->td_icvs.proc_bind =
1301 __kmp_nested_proc_bind.bind_types[level + 1];
1302 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001303#endif /* OMP_40_ENABLED */
1304
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001305#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001306 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001307#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001309
Jonathan Peyton30419822017-05-12 18:01:32 +00001310 /* set thread cache values */
1311 this_thr->th.th_team_nproc = 1;
1312 this_thr->th.th_team_master = this_thr;
1313 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001314
Jonathan Peyton30419822017-05-12 18:01:32 +00001315 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1316 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001319
Jonathan Peyton30419822017-05-12 18:01:32 +00001320 /* check if we need to allocate dispatch buffers stack */
1321 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1322 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1323 serial_team->t.t_dispatch->th_disp_buffer =
1324 (dispatch_private_info_t *)__kmp_allocate(
1325 sizeof(dispatch_private_info_t));
1326 }
1327 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328
Jonathan Peyton30419822017-05-12 18:01:32 +00001329 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001330
Jonathan Peyton30419822017-05-12 18:01:32 +00001331 } else {
1332 /* this serialized team is already being used,
1333 * that's fine, just add another nested level */
1334 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1335 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1336 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1337 ++serial_team->t.t_serialized;
1338 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339
Jonathan Peyton30419822017-05-12 18:01:32 +00001340 // Nested level will be an index in the nested nthreads array
1341 int level = this_thr->th.th_team->t.t_level;
1342 // Thread value exists in the nested nthreads array for the next nested
1343 // level
1344 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1345 this_thr->th.th_current_task->td_icvs.nproc =
1346 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001347 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 serial_team->t.t_level++;
1349 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1350 "of serial team %p to %d\n",
1351 global_tid, serial_team, serial_team->t.t_level));
1352
1353 /* allocate/push dispatch buffers stack */
1354 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1355 {
1356 dispatch_private_info_t *disp_buffer =
1357 (dispatch_private_info_t *)__kmp_allocate(
1358 sizeof(dispatch_private_info_t));
1359 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1360 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1361 }
1362 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1363
1364 KMP_MB();
1365 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001366#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001367 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001368#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001369
Jonathan Peyton30419822017-05-12 18:01:32 +00001370 if (__kmp_env_consistency_check)
1371 __kmp_push_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001372#if OMPT_SUPPORT
1373 serial_team->t.ompt_team_info.master_return_address = codeptr;
1374 if (ompt_enabled.enabled &&
1375 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001376 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001377
1378 ompt_lw_taskteam_t lw_taskteam;
1379 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1380 &ompt_parallel_data, codeptr);
1381
1382 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1383 // don't use lw_taskteam after linking. content was swaped
1384
1385 /* OMPT implicit task begin */
1386 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1387 if (ompt_enabled.ompt_callback_implicit_task) {
1388 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1389 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1390 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001391 OMPT_CUR_TASK_INFO(this_thr)
1392 ->thread_num = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00001393 }
1394
1395 /* OMPT state */
1396 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Joachim Protzec255ca72017-11-05 14:11:10 +00001397 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001398 }
1399#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001400}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001401
Jim Cownie5e8470a2013-09-27 10:38:44 +00001402/* most of the work for a fork */
1403/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001404int __kmp_fork_call(ident_t *loc, int gtid,
1405 enum fork_context_e call_context, // Intel, GNU, ...
Joachim Protze82e94a52017-11-01 10:08:30 +00001406 kmp_int32 argc, microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001407/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001408#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001409 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001410#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001411 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 ) {
1414 void **argv;
1415 int i;
1416 int master_tid;
1417 int master_this_cons;
1418 kmp_team_t *team;
1419 kmp_team_t *parent_team;
1420 kmp_info_t *master_th;
1421 kmp_root_t *root;
1422 int nthreads;
1423 int master_active;
1424 int master_set_numthreads;
1425 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001426#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001427 int active_level;
1428 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001431 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001432#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001433 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001434 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001435 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001436
Jonathan Peyton30419822017-05-12 18:01:32 +00001437 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1438 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1439 /* Some systems prefer the stack for the root thread(s) to start with */
1440 /* some gap from the parent stack to prevent false sharing. */
1441 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1442 /* These 2 lines below are so this does not get optimized out */
1443 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1444 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001445 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446
1447 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001448 KMP_DEBUG_ASSERT(
1449 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1450 if (!TCR_4(__kmp_init_parallel))
1451 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001452
1453 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001454 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1455 // shutdown
1456 parent_team = master_th->th.th_team;
1457 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001458 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001459 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001460 master_active = root->r.r_active;
1461 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001462
1463#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001464 ompt_data_t ompt_parallel_data;
1465 ompt_parallel_data.ptr = NULL;
1466 ompt_data_t *parent_task_data;
Joachim Protzec5836064b2018-05-28 08:14:58 +00001467 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001468 ompt_data_t *implicit_task_data;
1469 void *return_address = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001470
Joachim Protze82e94a52017-11-01 10:08:30 +00001471 if (ompt_enabled.enabled) {
1472 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1473 NULL, NULL);
1474 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001475 }
1476#endif
1477
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001479 level = parent_team->t.t_level;
1480 // used to launch non-serial teams even if nested is not allowed
1481 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001482#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001483 // needed to check nesting inside the teams
1484 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001486#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001487 p_hot_teams = &master_th->th.th_hot_teams;
1488 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1489 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1490 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1491 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001492 // it is either actual or not needed (when active_level > 0)
1493 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001494 }
1495#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001496
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001497#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001498 if (ompt_enabled.enabled) {
1499 if (ompt_enabled.ompt_callback_parallel_begin) {
1500 int team_size = master_set_numthreads
1501 ? master_set_numthreads
1502 : get__nproc_2(parent_team, master_tid);
1503 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1504 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1505 OMPT_INVOKER(call_context), return_address);
1506 }
1507 master_th->th.ompt_thread_info.state = omp_state_overhead;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508 }
1509#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001510
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511 master_th->th.th_ident = loc;
1512
1513#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001514 if (master_th->th.th_teams_microtask && ap &&
1515 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1516 // AC: This is start of parallel that is nested inside teams construct.
1517 // The team is actual (hot), all workers are ready at the fork barrier.
1518 // No lock needed to initialize the team a bit, then free workers.
1519 parent_team->t.t_ident = loc;
1520 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1521 parent_team->t.t_argc = argc;
1522 argv = (void **)parent_team->t.t_argv;
1523 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001524/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001525#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001526 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001527#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001528 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001529#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001530 // Increment our nested depth levels, but not increase the serialization
1531 if (parent_team == master_th->th.th_serial_team) {
1532 // AC: we are in serialized parallel
1533 __kmpc_serialized_parallel(loc, gtid);
1534 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1535 // AC: need this in order enquiry functions work
1536 // correctly, will restore at join time
1537 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001538#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 void *dummy;
1540 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001541
Jonathan Peyton30419822017-05-12 18:01:32 +00001542 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001543
Joachim Protze82e94a52017-11-01 10:08:30 +00001544 if (ompt_enabled.enabled) {
1545 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1546 &ompt_parallel_data, return_address);
Joachim Protzec255ca72017-11-05 14:11:10 +00001547 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001548
Joachim Protze82e94a52017-11-01 10:08:30 +00001549 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1550 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001551
Jonathan Peyton30419822017-05-12 18:01:32 +00001552 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001553 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1554 if (ompt_enabled.ompt_callback_implicit_task) {
1555 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1556 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1557 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001558 OMPT_CUR_TASK_INFO(master_th)
1559 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561
Jonathan Peyton30419822017-05-12 18:01:32 +00001562 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001563 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 } else {
1565 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001567#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001569 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001570 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1571 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1572 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1573#if OMPT_SUPPORT
1574 ,
1575 exit_runtime_p
1576#endif
1577 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579
Jonathan Peyton30419822017-05-12 18:01:32 +00001580#if OMPT_SUPPORT
1581 *exit_runtime_p = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001582 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001583 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001584 if (ompt_enabled.ompt_callback_implicit_task) {
1585 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1586 ompt_scope_end, NULL, implicit_task_data, 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001587 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001589 __ompt_lw_taskteam_unlink(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001590
Joachim Protze82e94a52017-11-01 10:08:30 +00001591 if (ompt_enabled.ompt_callback_parallel_end) {
1592 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1593 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1594 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001595 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001596 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 }
1598#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001599 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 }
1601
1602 parent_team->t.t_pkfn = microtask;
Jonathan Peyton30419822017-05-12 18:01:32 +00001603 parent_team->t.t_invoke = invoker;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001604 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00001605 parent_team->t.t_active_level++;
1606 parent_team->t.t_level++;
1607
1608 /* Change number of threads in the team if requested */
1609 if (master_set_numthreads) { // The parallel has num_threads clause
1610 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1611 // AC: only can reduce number of threads dynamically, can't increase
1612 kmp_info_t **other_threads = parent_team->t.t_threads;
1613 parent_team->t.t_nproc = master_set_numthreads;
1614 for (i = 0; i < master_set_numthreads; ++i) {
1615 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1616 }
1617 // Keep extra threads hot in the team for possible next parallels
1618 }
1619 master_th->th.th_set_nproc = 0;
1620 }
1621
1622#if USE_DEBUGGER
1623 if (__kmp_debugging) { // Let debugger override number of threads.
1624 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001625 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001626 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001627 }
1628 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001629#endif
1630
1631 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1632 "master_th=%p, gtid=%d\n",
1633 root, parent_team, master_th, gtid));
1634 __kmp_internal_fork(loc, gtid, parent_team);
1635 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1636 "master_th=%p, gtid=%d\n",
1637 root, parent_team, master_th, gtid));
1638
1639 /* Invoke microtask for MASTER thread */
1640 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1641 parent_team->t.t_id, parent_team->t.t_pkfn));
1642
1643 {
1644 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1645 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1646 if (!parent_team->t.t_invoke(gtid)) {
1647 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
1648 }
1649 }
1650 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1651 parent_team->t.t_id, parent_team->t.t_pkfn));
1652 KMP_MB(); /* Flush all pending memory write invalidates. */
1653
1654 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1655
1656 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001658#endif /* OMP_40_ENABLED */
1659
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001660#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001661 if (__kmp_tasking_mode != tskm_immediate_exec) {
1662 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1663 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001665#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 if (parent_team->t.t_active_level >=
1668 master_th->th.th_current_task->td_icvs.max_active_levels) {
1669 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001670 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001671#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001672 int enter_teams = ((ap == NULL && active_level == 0) ||
1673 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001674#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 nthreads =
1676 master_set_numthreads
1677 ? master_set_numthreads
1678 : get__nproc_2(
1679 parent_team,
1680 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001681
Jonathan Peyton30419822017-05-12 18:01:32 +00001682 // Check if we need to take forkjoin lock? (no need for serialized
1683 // parallel out of teams construct). This code moved here from
1684 // __kmp_reserve_threads() to speedup nested serialized parallels.
1685 if (nthreads > 1) {
1686 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001687#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001688 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001689#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 )) ||
1691 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001692 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1693 " threads\n",
1694 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001695 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001696 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 }
1698 if (nthreads > 1) {
1699 /* determine how many new threads we can use */
1700 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001701 nthreads = __kmp_reserve_threads(
1702 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001704 /* AC: If we execute teams from parallel region (on host), then
1705 teams should be created but each can only have 1 thread if
1706 nesting is disabled. If teams called from serial region, then
1707 teams and their threads should be created regardless of the
1708 nesting setting. */
1709 ,
1710 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 );
1713 if (nthreads == 1) {
1714 // Free lock for single thread execution here; for multi-thread
1715 // execution it will be freed later after team of threads created
1716 // and initialized
1717 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001718 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001721 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722
Jonathan Peyton30419822017-05-12 18:01:32 +00001723 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001724 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001725
Jim Cownie5e8470a2013-09-27 10:38:44 +00001726 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 if (nthreads == 1) {
1728/* josh todo: hypothetical question: what do we do for OS X*? */
1729#if KMP_OS_LINUX && \
1730 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1731 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001732#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001733 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1734#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1735 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 KA_TRACE(20,
1738 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001739
Jonathan Peyton30419822017-05-12 18:01:32 +00001740 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001741
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 if (call_context == fork_context_intel) {
1743 /* TODO this sucks, use the compiler itself to pass args! :) */
1744 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001746 if (!ap) {
1747 // revert change made in __kmpc_serialized_parallel()
1748 master_th->th.th_serial_team->t.t_level--;
1749// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001750
1751#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 void *dummy;
1753 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001754 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001755
Jonathan Peyton30419822017-05-12 18:01:32 +00001756 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001757
Joachim Protze82e94a52017-11-01 10:08:30 +00001758 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001759 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001760 &ompt_parallel_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761
Joachim Protze82e94a52017-11-01 10:08:30 +00001762 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1763 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001764
Joachim Protze82e94a52017-11-01 10:08:30 +00001765 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protzec255ca72017-11-05 14:11:10 +00001766 exit_runtime_p = &(task_info->frame.exit_frame);
Joachim Protze82e94a52017-11-01 10:08:30 +00001767 if (ompt_enabled.ompt_callback_implicit_task) {
1768 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1769 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1770 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001771 OMPT_CUR_TASK_INFO(master_th)
1772 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774
Jonathan Peyton30419822017-05-12 18:01:32 +00001775 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001776 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 } else {
1778 exit_runtime_p = &dummy;
1779 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001780#endif
1781
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 {
1783 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1784 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1785 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1786 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001787#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001788 ,
1789 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001791 );
1792 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793
1794#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001795 if (ompt_enabled.enabled) {
1796 exit_runtime_p = NULL;
1797 if (ompt_enabled.ompt_callback_implicit_task) {
1798 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1799 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001800 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001802
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001804 if (ompt_enabled.ompt_callback_parallel_end) {
1805 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1806 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1807 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001808 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001809 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001811#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 } else if (microtask == (microtask_t)__kmp_teams_master) {
1813 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1814 master_th->th.th_serial_team);
1815 team = master_th->th.th_team;
1816 // team->t.t_pkfn = microtask;
1817 team->t.t_invoke = invoker;
1818 __kmp_alloc_argv_entries(argc, team, TRUE);
1819 team->t.t_argc = argc;
1820 argv = (void **)team->t.t_argv;
1821 if (ap) {
1822 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001823// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001824#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001825 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001826#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001828#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001829 } else {
1830 for (i = 0; i < argc; ++i)
1831 // Get args from parent team for teams construct
1832 argv[i] = parent_team->t.t_argv[i];
1833 }
1834 // AC: revert change made in __kmpc_serialized_parallel()
1835 // because initial code in teams should have level=0
1836 team->t.t_level--;
1837 // AC: call special invoker for outer "parallel" of teams construct
1838 {
1839 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1840 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1841 invoker(gtid);
1842 }
1843 } else {
1844#endif /* OMP_40_ENABLED */
1845 argv = args;
1846 for (i = argc - 1; i >= 0; --i)
1847// TODO: revert workaround for Intel(R) 64 tracker #96
1848#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1849 *argv++ = va_arg(*ap, void *);
1850#else
1851 *argv++ = va_arg(ap, void *);
1852#endif
1853 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001854
1855#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 void *dummy;
1857 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001858 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001859
Jonathan Peyton30419822017-05-12 18:01:32 +00001860 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001861
Joachim Protze82e94a52017-11-01 10:08:30 +00001862 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001863 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001864 &ompt_parallel_data, return_address);
1865 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1866 // don't use lw_taskteam after linking. content was swaped
1867 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protzec255ca72017-11-05 14:11:10 +00001868 exit_runtime_p = &(task_info->frame.exit_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001869
Jonathan Peyton30419822017-05-12 18:01:32 +00001870 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001871 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1872 if (ompt_enabled.ompt_callback_implicit_task) {
1873 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1874 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1875 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001876 OMPT_CUR_TASK_INFO(master_th)
1877 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001881 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 } else {
1883 exit_runtime_p = &dummy;
1884 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001885#endif
1886
Jonathan Peyton30419822017-05-12 18:01:32 +00001887 {
1888 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1889 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1890 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001891#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001892 ,
1893 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001894#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001895 );
1896 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001897
1898#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001899 if (ompt_enabled.enabled) {
1900 *exit_runtime_p = NULL;
1901 if (ompt_enabled.ompt_callback_implicit_task) {
1902 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1903 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001904 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001905 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001906
Joachim Protze82e94a52017-11-01 10:08:30 +00001907 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001908 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001909 if (ompt_enabled.ompt_callback_parallel_end) {
1910 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1911 &ompt_parallel_data, parent_task_data,
1912 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001913 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001914 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001916#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001918 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001919#endif /* OMP_40_ENABLED */
1920 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001921#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001922 ompt_lw_taskteam_t lwt;
1923 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1924 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001925
Joachim Protzec255ca72017-11-05 14:11:10 +00001926 lwt.ompt_task_info.frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001927 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1928// don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001929#endif
1930
Jonathan Peyton30419822017-05-12 18:01:32 +00001931 // we were called from GNU native code
1932 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001934 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001935 KMP_ASSERT2(call_context < fork_context_last,
1936 "__kmp_fork_call: unknown fork_context parameter");
1937 }
1938
1939 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1940 KMP_MB();
1941 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942 }
1943
Jim Cownie5e8470a2013-09-27 10:38:44 +00001944 // GEH: only modify the executing flag in the case when not serialized
1945 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001946 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1947 "curtask=%p, curtask_max_aclevel=%d\n",
1948 parent_team->t.t_active_level, master_th,
1949 master_th->th.th_current_task,
1950 master_th->th.th_current_task->td_icvs.max_active_levels));
1951 // TODO: GEH - cannot do this assertion because root thread not set up as
1952 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1954 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955
1956#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001957 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958#endif /* OMP_40_ENABLED */
1959 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 /* Increment our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001961 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 }
1963
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001966 if ((level + 1 < __kmp_nested_nth.used) &&
1967 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1968 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1969 } else {
1970 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 }
1972
1973#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001976 kmp_proc_bind_t proc_bind_icv =
1977 proc_bind_default; // proc_bind_default means don't update
1978 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1979 proc_bind = proc_bind_false;
1980 } else {
1981 if (proc_bind == proc_bind_default) {
1982 // No proc_bind clause specified; use current proc-bind-var for this
1983 // parallel region
1984 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1985 }
1986 /* else: The proc_bind policy was specified explicitly on parallel clause.
1987 This overrides proc-bind-var for this parallel region, but does not
1988 change proc-bind-var. */
1989 // Figure the value of proc-bind-var for the child threads.
1990 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1991 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1992 master_th->th.th_current_task->td_icvs.proc_bind)) {
1993 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1994 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995 }
1996
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998 master_th->th.th_set_proc_bind = proc_bind_default;
1999#endif /* OMP_40_ENABLED */
2000
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002001 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002003 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002005 ) {
2006 kmp_internal_control_t new_icvs;
2007 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2008 new_icvs.next = NULL;
2009 if (nthreads_icv > 0) {
2010 new_icvs.nproc = nthreads_icv;
2011 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012
2013#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002014 if (proc_bind_icv != proc_bind_default) {
2015 new_icvs.proc_bind = proc_bind_icv;
2016 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017#endif /* OMP_40_ENABLED */
2018
Jonathan Peyton30419822017-05-12 18:01:32 +00002019 /* allocate a new parallel team */
2020 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2021 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002022#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002023 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002024#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002028 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002029 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002030 /* allocate a new parallel team */
2031 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2032 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002033#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002034 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002035#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002037 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002038#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002039 &master_th->th.th_current_task->td_icvs,
2040 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002042 KF_TRACE(
2043 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002044
2045 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002046 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2047 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2048 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2049 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2050 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002051#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002052 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2053 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002055 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2056// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002058 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002060 int new_level = parent_team->t.t_level + 1;
2061 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2062 new_level = parent_team->t.t_active_level + 1;
2063 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064#if OMP_40_ENABLED
2065 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002066 // AC: Do not increase parallel level at start of the teams construct
2067 int new_level = parent_team->t.t_level;
2068 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2069 new_level = parent_team->t.t_active_level;
2070 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002071 }
2072#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002073 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002074 // set master's schedule as new run-time schedule
2075 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002076
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002077#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002078 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002079#endif
2080
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002081 // Update the floating point rounding in the team if required.
2082 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
Jonathan Peyton30419822017-05-12 18:01:32 +00002084 if (__kmp_tasking_mode != tskm_immediate_exec) {
2085 // Set master's task team to team's task team. Unless this is hot team, it
2086 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002087 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2088 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002089 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2090 "%p, new task_team %p / team %p\n",
2091 __kmp_gtid_from_thread(master_th),
2092 master_th->th.th_task_team, parent_team,
2093 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002094
Jonathan Peyton30419822017-05-12 18:01:32 +00002095 if (active_level || master_th->th.th_task_team) {
2096 // Take a memo of master's task_state
2097 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2098 if (master_th->th.th_task_state_top >=
2099 master_th->th.th_task_state_stack_sz) { // increase size
2100 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2101 kmp_uint8 *old_stack, *new_stack;
2102 kmp_uint32 i;
2103 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2104 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2105 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2106 }
2107 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2108 ++i) { // zero-init rest of stack
2109 new_stack[i] = 0;
2110 }
2111 old_stack = master_th->th.th_task_state_memo_stack;
2112 master_th->th.th_task_state_memo_stack = new_stack;
2113 master_th->th.th_task_state_stack_sz = new_size;
2114 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002115 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002116 // Store master's task_state on stack
2117 master_th->th
2118 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2119 master_th->th.th_task_state;
2120 master_th->th.th_task_state_top++;
2121#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002122 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2123 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002124 master_th->th.th_task_state =
2125 master_th->th
2126 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2127 } else {
2128#endif
2129 master_th->th.th_task_state = 0;
2130#if KMP_NESTED_HOT_TEAMS
2131 }
2132#endif
2133 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002134#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002135 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2136 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002137#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002139
Jonathan Peyton30419822017-05-12 18:01:32 +00002140 KA_TRACE(
2141 20,
2142 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2143 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2144 team->t.t_nproc));
2145 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2146 (team->t.t_master_tid == 0 &&
2147 (team->t.t_parent == root->r.r_root_team ||
2148 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002149 KMP_MB();
2150
2151 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002152 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002154 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002155#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002156 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002157// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002158#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002159 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002160#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002161 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002163 KMP_CHECK_UPDATE(*argv, new_argv);
2164 argv++;
2165 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166#if OMP_40_ENABLED
2167 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002168 for (i = 0; i < argc; ++i) {
2169 // Get args from parent team for teams construct
2170 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2171 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002172 }
2173#endif /* OMP_40_ENABLED */
2174
2175 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002176 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002177 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002178 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002179
Jonathan Peyton30419822017-05-12 18:01:32 +00002180 __kmp_fork_team_threads(root, team, master_th, gtid);
2181 __kmp_setup_icv_copy(team, nthreads,
2182 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002184#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002185 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002186#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187
Jonathan Peyton30419822017-05-12 18:01:32 +00002188 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002191 if (team->t.t_active_level == 1 // only report frames at level 1
2192#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002193 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002194#endif /* OMP_40_ENABLED */
2195 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002196#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002197 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2198 (__kmp_forkjoin_frames_mode == 3 ||
2199 __kmp_forkjoin_frames_mode == 1)) {
2200 kmp_uint64 tmp_time = 0;
2201 if (__itt_get_timestamp_ptr)
2202 tmp_time = __itt_get_timestamp();
2203 // Internal fork - report frame begin
2204 master_th->th.th_frame_time = tmp_time;
2205 if (__kmp_forkjoin_frames_mode == 3)
2206 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002207 } else
2208// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002209#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002210 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2211 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002212 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002213 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2214 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002215 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216#endif /* USE_ITT_BUILD */
2217
2218 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002219 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002221 KF_TRACE(10,
2222 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2223 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224
2225#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002226 if (__itt_stack_caller_create_ptr) {
2227 team->t.t_stack_id =
2228 __kmp_itt_stack_caller_create(); // create new stack stitching id
2229 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002230 }
2231#endif /* USE_ITT_BUILD */
2232
2233#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002234 // AC: skip __kmp_internal_fork at teams construct, let only master
2235 // threads execute
2236 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002237#endif /* OMP_40_ENABLED */
2238 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002239 __kmp_internal_fork(loc, gtid, team);
2240 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2241 "master_th=%p, gtid=%d\n",
2242 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002243 }
2244
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002245 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002246 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2247 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248 }
2249
2250 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002251 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2252 team->t.t_id, team->t.t_pkfn));
2253 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002254
Jonathan Peyton30419822017-05-12 18:01:32 +00002255 {
2256 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2257 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2258 if (!team->t.t_invoke(gtid)) {
2259 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002261 }
2262 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2263 team->t.t_id, team->t.t_pkfn));
2264 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002268#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002269 if (ompt_enabled.enabled) {
2270 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002271 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002272#endif
2273
Jonathan Peyton30419822017-05-12 18:01:32 +00002274 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275}
2276
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002277#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002278static inline void __kmp_join_restore_state(kmp_info_t *thread,
2279 kmp_team_t *team) {
2280 // restore state outside the region
2281 thread->th.ompt_thread_info.state =
Joachim Protze82e94a52017-11-01 10:08:30 +00002282 ((team->t.t_serialized) ? omp_state_work_serial
2283 : omp_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002284}
2285
Joachim Protze82e94a52017-11-01 10:08:30 +00002286static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2287 kmp_team_t *team, ompt_data_t *parallel_data,
2288 fork_context_e fork_context, void *codeptr) {
2289 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2290 if (ompt_enabled.ompt_callback_parallel_end) {
2291 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2292 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2293 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002294 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002295
Joachim Protzec255ca72017-11-05 14:11:10 +00002296 task_info->frame.enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00002297 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002298}
2299#endif
2300
Jonathan Peyton30419822017-05-12 18:01:32 +00002301void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002302#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002303 ,
2304 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002305#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002306#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002307 ,
2308 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002309#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002310 ) {
2311 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2312 kmp_team_t *team;
2313 kmp_team_t *parent_team;
2314 kmp_info_t *master_th;
2315 kmp_root_t *root;
2316 int master_active;
2317 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002320
Jonathan Peyton30419822017-05-12 18:01:32 +00002321 /* setup current data */
2322 master_th = __kmp_threads[gtid];
2323 root = master_th->th.th_root;
2324 team = master_th->th.th_team;
2325 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002326
Jonathan Peyton30419822017-05-12 18:01:32 +00002327 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002329#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002330 if (ompt_enabled.enabled) {
2331 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002332 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002333#endif
2334
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002335#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002336 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2337 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2338 "th_task_team = %p\n",
2339 __kmp_gtid_from_thread(master_th), team,
2340 team->t.t_task_team[master_th->th.th_task_state],
2341 master_th->th.th_task_team));
2342 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2343 team->t.t_task_team[master_th->th.th_task_state]);
2344 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002345#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346
Jonathan Peyton30419822017-05-12 18:01:32 +00002347 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002349 if (master_th->th.th_teams_microtask) {
2350 // We are in teams construct
2351 int level = team->t.t_level;
2352 int tlevel = master_th->th.th_teams_level;
2353 if (level == tlevel) {
2354 // AC: we haven't incremented it earlier at start of teams construct,
2355 // so do it here - at the end of teams construct
2356 team->t.t_level++;
2357 } else if (level == tlevel + 1) {
2358 // AC: we are exiting parallel inside teams, need to increment
2359 // serialization in order to restore it in the next call to
2360 // __kmpc_end_serialized_parallel
2361 team->t.t_serialized++;
2362 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002363 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002364#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002365 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002367#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002368 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002370 }
2371#endif
2372
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 return;
2374 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375
Jonathan Peyton30419822017-05-12 18:01:32 +00002376 master_active = team->t.t_master_active;
2377
2378#if OMP_40_ENABLED
2379 if (!exit_teams)
2380#endif /* OMP_40_ENABLED */
2381 {
2382 // AC: No barrier for internal teams at exit from teams construct.
2383 // But there is barrier for external team (league).
2384 __kmp_internal_join(loc, gtid, team);
2385 }
2386#if OMP_40_ENABLED
2387 else {
2388 master_th->th.th_task_state =
2389 0; // AC: no tasking in teams (out of any parallel)
2390 }
2391#endif /* OMP_40_ENABLED */
2392
2393 KMP_MB();
2394
2395#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002396 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2397 void *codeptr = team->t.ompt_team_info.master_return_address;
Jonathan Peyton30419822017-05-12 18:01:32 +00002398#endif
2399
2400#if USE_ITT_BUILD
2401 if (__itt_stack_caller_create_ptr) {
2402 __kmp_itt_stack_caller_destroy(
2403 (__itt_caller)team->t
2404 .t_stack_id); // destroy the stack stitching id after join barrier
2405 }
2406
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002407 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002408 if (team->t.t_active_level == 1
2409#if OMP_40_ENABLED
2410 && !master_th->th.th_teams_microtask /* not in teams construct */
2411#endif /* OMP_40_ENABLED */
2412 ) {
2413 master_th->th.th_ident = loc;
2414 // only one notification scheme (either "submit" or "forking/joined", not
2415 // both)
2416 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2417 __kmp_forkjoin_frames_mode == 3)
2418 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2419 master_th->th.th_frame_time, 0, loc,
2420 master_th->th.th_team_nproc, 1);
2421 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2422 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2423 __kmp_itt_region_joined(gtid);
2424 } // active_level == 1
2425#endif /* USE_ITT_BUILD */
2426
2427#if OMP_40_ENABLED
2428 if (master_th->th.th_teams_microtask && !exit_teams &&
2429 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2430 team->t.t_level == master_th->th.th_teams_level + 1) {
2431 // AC: We need to leave the team structure intact at the end of parallel
2432 // inside the teams construct, so that at the next parallel same (hot) team
2433 // works, only adjust nesting levels
2434
2435 /* Decrement our nested depth level */
2436 team->t.t_level--;
2437 team->t.t_active_level--;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002438 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002439
2440 /* Restore number of threads in the team if needed */
2441 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2442 int old_num = master_th->th.th_team_nproc;
2443 int new_num = master_th->th.th_teams_size.nth;
2444 kmp_info_t **other_threads = team->t.t_threads;
2445 team->t.t_nproc = new_num;
2446 for (i = 0; i < old_num; ++i) {
2447 other_threads[i]->th.th_team_nproc = new_num;
2448 }
2449 // Adjust states of non-used threads of the team
2450 for (i = old_num; i < new_num; ++i) {
2451 // Re-initialize thread's barrier data.
2452 int b;
2453 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2454 for (b = 0; b < bs_last_barrier; ++b) {
2455 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2456 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2457#if USE_DEBUGGER
2458 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2459#endif
2460 }
2461 if (__kmp_tasking_mode != tskm_immediate_exec) {
2462 // Synchronize thread's task state
2463 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2464 }
2465 }
2466 }
2467
2468#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002469 if (ompt_enabled.enabled) {
2470 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2471 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002472 }
2473#endif
2474
2475 return;
2476 }
2477#endif /* OMP_40_ENABLED */
2478
2479 /* do cleanup and restore the parent team */
2480 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2481 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2482
2483 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2484
2485 /* jc: The following lock has instructions with REL and ACQ semantics,
2486 separating the parallel user code called in this parallel region
2487 from the serial user code called after this function returns. */
2488 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2489
2490#if OMP_40_ENABLED
2491 if (!master_th->th.th_teams_microtask ||
2492 team->t.t_level > master_th->th.th_teams_level)
2493#endif /* OMP_40_ENABLED */
2494 {
2495 /* Decrement our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002496 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 }
2498 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2499
Joachim Protze82e94a52017-11-01 10:08:30 +00002500#if OMPT_SUPPORT
2501 if (ompt_enabled.enabled) {
2502 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2503 if (ompt_enabled.ompt_callback_implicit_task) {
2504 int ompt_team_size = team->t.t_nproc;
2505 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2506 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
Joachim Protze9be9cf22018-05-07 12:42:21 +00002507 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00002508 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002509
Joachim Protzec255ca72017-11-05 14:11:10 +00002510 task_info->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002511 task_info->task_data = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002512 }
2513#endif
2514
2515 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2516 master_th, team));
2517 __kmp_pop_current_task_from_thread(master_th);
2518
2519#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2520 // Restore master thread's partition.
2521 master_th->th.th_first_place = team->t.t_first_place;
2522 master_th->th.th_last_place = team->t.t_last_place;
2523#endif /* OMP_40_ENABLED */
2524
2525 updateHWFPControl(team);
2526
2527 if (root->r.r_active != master_active)
2528 root->r.r_active = master_active;
2529
2530 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2531 master_th)); // this will free worker threads
2532
2533 /* this race was fun to find. make sure the following is in the critical
2534 region otherwise assertions may fail occasionally since the old team may be
2535 reallocated and the hierarchy appears inconsistent. it is actually safe to
2536 run and won't cause any bugs, but will cause those assertion failures. it's
2537 only one deref&assign so might as well put this in the critical region */
2538 master_th->th.th_team = parent_team;
2539 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2540 master_th->th.th_team_master = parent_team->t.t_threads[0];
2541 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2542
2543 /* restore serialized team, if need be */
2544 if (parent_team->t.t_serialized &&
2545 parent_team != master_th->th.th_serial_team &&
2546 parent_team != root->r.r_root_team) {
2547 __kmp_free_team(root,
2548 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2549 master_th->th.th_serial_team = parent_team;
2550 }
2551
2552 if (__kmp_tasking_mode != tskm_immediate_exec) {
2553 if (master_th->th.th_task_state_top >
2554 0) { // Restore task state from memo stack
2555 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2556 // Remember master's state if we re-use this nested hot team
2557 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2558 master_th->th.th_task_state;
2559 --master_th->th.th_task_state_top; // pop
2560 // Now restore state at this level
2561 master_th->th.th_task_state =
2562 master_th->th
2563 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2564 }
2565 // Copy the task team from the parent team to the master thread
2566 master_th->th.th_task_team =
2567 parent_team->t.t_task_team[master_th->th.th_task_state];
2568 KA_TRACE(20,
2569 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2570 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2571 parent_team));
2572 }
2573
2574 // TODO: GEH - cannot do this assertion because root thread not set up as
2575 // executing
2576 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2577 master_th->th.th_current_task->td_flags.executing = 1;
2578
2579 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2580
2581#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002582 if (ompt_enabled.enabled) {
2583 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2584 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002585 }
2586#endif
2587
2588 KMP_MB();
2589 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2590}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591
2592/* Check whether we should push an internal control record onto the
2593 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002594void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595
Jonathan Peyton30419822017-05-12 18:01:32 +00002596 if (thread->th.th_team != thread->th.th_serial_team) {
2597 return;
2598 }
2599 if (thread->th.th_team->t.t_serialized > 1) {
2600 int push = 0;
2601
2602 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2603 push = 1;
2604 } else {
2605 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2606 thread->th.th_team->t.t_serialized) {
2607 push = 1;
2608 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 if (push) { /* push a record on the serial team's stack */
2611 kmp_internal_control_t *control =
2612 (kmp_internal_control_t *)__kmp_allocate(
2613 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614
Jonathan Peyton30419822017-05-12 18:01:32 +00002615 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616
Jonathan Peyton30419822017-05-12 18:01:32 +00002617 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618
Jonathan Peyton30419822017-05-12 18:01:32 +00002619 control->next = thread->th.th_team->t.t_control_stack_top;
2620 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002622 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623}
2624
2625/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002626void __kmp_set_num_threads(int new_nth, int gtid) {
2627 kmp_info_t *thread;
2628 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
Jonathan Peyton30419822017-05-12 18:01:32 +00002630 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2631 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 if (new_nth < 1)
2634 new_nth = 1;
2635 else if (new_nth > __kmp_max_nth)
2636 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637
Jonathan Peyton30419822017-05-12 18:01:32 +00002638 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2639 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640
Jonathan Peyton30419822017-05-12 18:01:32 +00002641 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642
Jonathan Peyton30419822017-05-12 18:01:32 +00002643 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002644
Jonathan Peyton30419822017-05-12 18:01:32 +00002645 // If this omp_set_num_threads() call will cause the hot team size to be
2646 // reduced (in the absence of a num_threads clause), then reduce it now,
2647 // rather than waiting for the next parallel region.
2648 root = thread->th.th_root;
2649 if (__kmp_init_parallel && (!root->r.r_active) &&
2650 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002651#if KMP_NESTED_HOT_TEAMS
2652 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2653#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002654 ) {
2655 kmp_team_t *hot_team = root->r.r_hot_team;
2656 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002659
Jonathan Peyton30419822017-05-12 18:01:32 +00002660 // Release the extra threads we don't need any more.
2661 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2662 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2663 if (__kmp_tasking_mode != tskm_immediate_exec) {
2664 // When decreasing team size, threads no longer in the team should unref
2665 // task team.
2666 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2667 }
2668 __kmp_free_thread(hot_team->t.t_threads[f]);
2669 hot_team->t.t_threads[f] = NULL;
2670 }
2671 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002672#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 if (thread->th.th_hot_teams) {
2674 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2675 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2676 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002677#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678
Jonathan Peyton30419822017-05-12 18:01:32 +00002679 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 // Update the t_nproc field in the threads that are still active.
2682 for (f = 0; f < new_nth; f++) {
2683 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2684 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002685 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002686 // Special flag in case omp_set_num_threads() call
2687 hot_team->t.t_size_changed = -1;
2688 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002689}
2690
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002692void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2693 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002694
Jonathan Peyton30419822017-05-12 18:01:32 +00002695 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2696 "%d = (%d)\n",
2697 gtid, max_active_levels));
2698 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 // validate max_active_levels
2701 if (max_active_levels < 0) {
2702 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2703 // We ignore this call if the user has specified a negative value.
2704 // The current setting won't be changed. The last valid setting will be
2705 // used. A warning will be issued (if warnings are allowed as controlled by
2706 // the KMP_WARNINGS env var).
2707 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2708 "max_active_levels for thread %d = (%d)\n",
2709 gtid, max_active_levels));
2710 return;
2711 }
2712 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2713 // it's OK, the max_active_levels is within the valid range: [ 0;
2714 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2715 // We allow a zero value. (implementation defined behavior)
2716 } else {
2717 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2718 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2719 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2720 // Current upper limit is MAX_INT. (implementation defined behavior)
2721 // If the input exceeds the upper limit, we correct the input to be the
2722 // upper limit. (implementation defined behavior)
2723 // Actually, the flow should never get here until we use MAX_INT limit.
2724 }
2725 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2726 "max_active_levels for thread %d = (%d)\n",
2727 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728
Jonathan Peyton30419822017-05-12 18:01:32 +00002729 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002730
Jonathan Peyton30419822017-05-12 18:01:32 +00002731 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002732
Jonathan Peyton30419822017-05-12 18:01:32 +00002733 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734}
2735
2736/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002737int __kmp_get_max_active_levels(int gtid) {
2738 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739
Jonathan Peyton30419822017-05-12 18:01:32 +00002740 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2741 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742
Jonathan Peyton30419822017-05-12 18:01:32 +00002743 thread = __kmp_threads[gtid];
2744 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2745 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2746 "curtask_maxaclevel=%d\n",
2747 gtid, thread->th.th_current_task,
2748 thread->th.th_current_task->td_icvs.max_active_levels));
2749 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002750}
2751
2752/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002753void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2754 kmp_info_t *thread;
2755 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002756
Jonathan Peyton30419822017-05-12 18:01:32 +00002757 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2758 gtid, (int)kind, chunk));
2759 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002760
Jonathan Peyton30419822017-05-12 18:01:32 +00002761 // Check if the kind parameter is valid, correct if needed.
2762 // Valid parameters should fit in one of two intervals - standard or extended:
2763 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2764 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2765 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2766 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2767 // TODO: Hint needs attention in case we change the default schedule.
2768 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2769 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2770 __kmp_msg_null);
2771 kind = kmp_sched_default;
2772 chunk = 0; // ignore chunk value in case of bad kind
2773 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002774
Jonathan Peyton30419822017-05-12 18:01:32 +00002775 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776
Jonathan Peyton30419822017-05-12 18:01:32 +00002777 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778
Jonathan Peyton30419822017-05-12 18:01:32 +00002779 if (kind < kmp_sched_upper_std) {
2780 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2781 // differ static chunked vs. unchunked: chunk should be invalid to
2782 // indicate unchunked schedule (which is the default)
2783 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002785 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2786 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002787 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002788 } else {
2789 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2790 // kmp_sched_lower - 2 ];
2791 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2792 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2793 kmp_sched_lower - 2];
2794 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002795 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 // ignore parameter chunk for schedule auto
2797 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2798 } else {
2799 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2800 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801}
2802
2803/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002804void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2805 kmp_info_t *thread;
2806 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807
Jonathan Peyton30419822017-05-12 18:01:32 +00002808 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2809 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002810
Jonathan Peyton30419822017-05-12 18:01:32 +00002811 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002812
Jonathan Peyton30419822017-05-12 18:01:32 +00002813 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002814
Jonathan Peyton30419822017-05-12 18:01:32 +00002815 switch (th_type) {
2816 case kmp_sch_static:
2817 case kmp_sch_static_greedy:
2818 case kmp_sch_static_balanced:
2819 *kind = kmp_sched_static;
2820 *chunk = 0; // chunk was not set, try to show this fact via zero value
2821 return;
2822 case kmp_sch_static_chunked:
2823 *kind = kmp_sched_static;
2824 break;
2825 case kmp_sch_dynamic_chunked:
2826 *kind = kmp_sched_dynamic;
2827 break;
2828 case kmp_sch_guided_chunked:
2829 case kmp_sch_guided_iterative_chunked:
2830 case kmp_sch_guided_analytical_chunked:
2831 *kind = kmp_sched_guided;
2832 break;
2833 case kmp_sch_auto:
2834 *kind = kmp_sched_auto;
2835 break;
2836 case kmp_sch_trapezoidal:
2837 *kind = kmp_sched_trapezoidal;
2838 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002839#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002840 case kmp_sch_static_steal:
2841 *kind = kmp_sched_static_steal;
2842 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002843#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002844 default:
2845 KMP_FATAL(UnknownSchedulingType, th_type);
2846 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847
Jonathan Peyton30419822017-05-12 18:01:32 +00002848 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002849}
2850
Jonathan Peyton30419822017-05-12 18:01:32 +00002851int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002852
Jonathan Peyton30419822017-05-12 18:01:32 +00002853 int ii, dd;
2854 kmp_team_t *team;
2855 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002856
Jonathan Peyton30419822017-05-12 18:01:32 +00002857 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2858 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859
Jonathan Peyton30419822017-05-12 18:01:32 +00002860 // validate level
2861 if (level == 0)
2862 return 0;
2863 if (level < 0)
2864 return -1;
2865 thr = __kmp_threads[gtid];
2866 team = thr->th.th_team;
2867 ii = team->t.t_level;
2868 if (level > ii)
2869 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002870
2871#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 if (thr->th.th_teams_microtask) {
2873 // AC: we are in teams region where multiple nested teams have same level
2874 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2875 if (level <=
2876 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2877 KMP_DEBUG_ASSERT(ii >= tlevel);
2878 // AC: As we need to pass by the teams league, we need to artificially
2879 // increase ii
2880 if (ii == tlevel) {
2881 ii += 2; // three teams have same level
2882 } else {
2883 ii++; // two teams have same level
2884 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002886 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887#endif
2888
Jonathan Peyton30419822017-05-12 18:01:32 +00002889 if (ii == level)
2890 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002891
Jonathan Peyton30419822017-05-12 18:01:32 +00002892 dd = team->t.t_serialized;
2893 level++;
2894 while (ii > level) {
2895 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002897 if ((team->t.t_serialized) && (!dd)) {
2898 team = team->t.t_parent;
2899 continue;
2900 }
2901 if (ii > level) {
2902 team = team->t.t_parent;
2903 dd = team->t.t_serialized;
2904 ii--;
2905 }
2906 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002907
Jonathan Peyton30419822017-05-12 18:01:32 +00002908 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002909}
2910
Jonathan Peyton30419822017-05-12 18:01:32 +00002911int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 int ii, dd;
2914 kmp_team_t *team;
2915 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916
Jonathan Peyton30419822017-05-12 18:01:32 +00002917 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2918 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002919
Jonathan Peyton30419822017-05-12 18:01:32 +00002920 // validate level
2921 if (level == 0)
2922 return 1;
2923 if (level < 0)
2924 return -1;
2925 thr = __kmp_threads[gtid];
2926 team = thr->th.th_team;
2927 ii = team->t.t_level;
2928 if (level > ii)
2929 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930
2931#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002932 if (thr->th.th_teams_microtask) {
2933 // AC: we are in teams region where multiple nested teams have same level
2934 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2935 if (level <=
2936 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2937 KMP_DEBUG_ASSERT(ii >= tlevel);
2938 // AC: As we need to pass by the teams league, we need to artificially
2939 // increase ii
2940 if (ii == tlevel) {
2941 ii += 2; // three teams have same level
2942 } else {
2943 ii++; // two teams have same level
2944 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002945 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002946 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947#endif
2948
Jonathan Peyton30419822017-05-12 18:01:32 +00002949 while (ii > level) {
2950 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002951 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002952 if (team->t.t_serialized && (!dd)) {
2953 team = team->t.t_parent;
2954 continue;
2955 }
2956 if (ii > level) {
2957 team = team->t.t_parent;
2958 ii--;
2959 }
2960 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002961
Jonathan Peyton30419822017-05-12 18:01:32 +00002962 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963}
2964
Jonathan Peyton30419822017-05-12 18:01:32 +00002965kmp_r_sched_t __kmp_get_schedule_global() {
2966 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2967 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2968 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969
Jonathan Peyton30419822017-05-12 18:01:32 +00002970 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971
Jonathan Peyton30419822017-05-12 18:01:32 +00002972 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2973 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2974 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2975 // different roots (even in OMP 2.5)
2976 if (__kmp_sched == kmp_sch_static) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002977 // replace STATIC with more detailed schedule (balanced or greedy)
2978 r_sched.r_sched_type = __kmp_static;
Jonathan Peyton30419822017-05-12 18:01:32 +00002979 } else if (__kmp_sched == kmp_sch_guided_chunked) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002980 // replace GUIDED with more detailed schedule (iterative or analytical)
2981 r_sched.r_sched_type = __kmp_guided;
2982 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2983 r_sched.r_sched_type = __kmp_sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00002984 }
2985
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002986 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2987 // __kmp_chunk may be wrong here (if it was not ever set)
Jonathan Peyton30419822017-05-12 18:01:32 +00002988 r_sched.chunk = KMP_DEFAULT_CHUNK;
2989 } else {
2990 r_sched.chunk = __kmp_chunk;
2991 }
2992
2993 return r_sched;
2994}
2995
2996/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2997 at least argc number of *t_argv entries for the requested team. */
2998static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2999
3000 KMP_DEBUG_ASSERT(team);
3001 if (!realloc || argc > team->t.t_max_argc) {
3002
3003 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3004 "current entries=%d\n",
3005 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3006 /* if previously allocated heap space for args, free them */
3007 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3008 __kmp_free((void *)team->t.t_argv);
3009
3010 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3011 /* use unused space in the cache line for arguments */
3012 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3013 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3014 "argv entries\n",
3015 team->t.t_id, team->t.t_max_argc));
3016 team->t.t_argv = &team->t.t_inline_argv[0];
3017 if (__kmp_storage_map) {
3018 __kmp_print_storage_map_gtid(
3019 -1, &team->t.t_inline_argv[0],
3020 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3021 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3022 team->t.t_id);
3023 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003025 /* allocate space for arguments in the heap */
3026 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3027 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3028 : 2 * argc;
3029 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3030 "argv entries\n",
3031 team->t.t_id, team->t.t_max_argc));
3032 team->t.t_argv =
3033 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3034 if (__kmp_storage_map) {
3035 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3036 &team->t.t_argv[team->t.t_max_argc],
3037 sizeof(void *) * team->t.t_max_argc,
3038 "team_%d.t_argv", team->t.t_id);
3039 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042}
3043
Jonathan Peyton30419822017-05-12 18:01:32 +00003044static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3045 int i;
3046 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3047 team->t.t_threads =
3048 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3049 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3050 sizeof(dispatch_shared_info_t) * num_disp_buff);
3051 team->t.t_dispatch =
3052 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3053 team->t.t_implicit_task_taskdata =
3054 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3055 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056
Jonathan Peyton30419822017-05-12 18:01:32 +00003057 /* setup dispatch buffers */
3058 for (i = 0; i < num_disp_buff; ++i) {
3059 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003060#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003061 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003062#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003064}
3065
Jonathan Peyton30419822017-05-12 18:01:32 +00003066static void __kmp_free_team_arrays(kmp_team_t *team) {
3067 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3068 int i;
3069 for (i = 0; i < team->t.t_max_nproc; ++i) {
3070 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3071 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3072 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003073 }
3074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003075 __kmp_free(team->t.t_threads);
3076 __kmp_free(team->t.t_disp_buffer);
3077 __kmp_free(team->t.t_dispatch);
3078 __kmp_free(team->t.t_implicit_task_taskdata);
3079 team->t.t_threads = NULL;
3080 team->t.t_disp_buffer = NULL;
3081 team->t.t_dispatch = NULL;
3082 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083}
3084
Jonathan Peyton30419822017-05-12 18:01:32 +00003085static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3086 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087
Jonathan Peyton30419822017-05-12 18:01:32 +00003088 __kmp_free(team->t.t_disp_buffer);
3089 __kmp_free(team->t.t_dispatch);
3090 __kmp_free(team->t.t_implicit_task_taskdata);
3091 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092
Jonathan Peyton30419822017-05-12 18:01:32 +00003093 KMP_MEMCPY(team->t.t_threads, oldThreads,
3094 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003095
Jonathan Peyton30419822017-05-12 18:01:32 +00003096 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097}
3098
Jonathan Peyton30419822017-05-12 18:01:32 +00003099static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 kmp_r_sched_t r_sched =
3102 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103
3104#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003105 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106#endif /* OMP_40_ENABLED */
3107
Jonathan Peyton30419822017-05-12 18:01:32 +00003108 kmp_internal_control_t g_icvs = {
3109 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3110 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3111 // for nested parallelism (per thread)
3112 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3113 // adjustment of threads (per thread)
3114 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3115 // whether blocktime is explicitly set
3116 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003117#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003118 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3119// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003120#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003121 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3122 // next parallel region (per thread)
3123 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3124 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3125 // for max_active_levels
3126 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3127// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003128#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003129 __kmp_nested_proc_bind.bind_types[0],
3130 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003132 NULL // struct kmp_internal_control *next;
3133 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003134
Jonathan Peyton30419822017-05-12 18:01:32 +00003135 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003136}
3137
Jonathan Peyton30419822017-05-12 18:01:32 +00003138static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139
Jonathan Peyton30419822017-05-12 18:01:32 +00003140 kmp_internal_control_t gx_icvs;
3141 gx_icvs.serial_nesting_level =
3142 0; // probably =team->t.t_serial like in save_inter_controls
3143 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3144 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145
Jonathan Peyton30419822017-05-12 18:01:32 +00003146 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147}
3148
Jonathan Peyton30419822017-05-12 18:01:32 +00003149static void __kmp_initialize_root(kmp_root_t *root) {
3150 int f;
3151 kmp_team_t *root_team;
3152 kmp_team_t *hot_team;
3153 int hot_team_max_nth;
3154 kmp_r_sched_t r_sched =
3155 __kmp_get_schedule_global(); // get current state of scheduling globals
3156 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3157 KMP_DEBUG_ASSERT(root);
3158 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003159
Jonathan Peyton30419822017-05-12 18:01:32 +00003160 /* setup the root state structure */
3161 __kmp_init_lock(&root->r.r_begin_lock);
3162 root->r.r_begin = FALSE;
3163 root->r.r_active = FALSE;
3164 root->r.r_in_parallel = 0;
3165 root->r.r_blocktime = __kmp_dflt_blocktime;
3166 root->r.r_nested = __kmp_dflt_nested;
Jonathan Peytonf4392462017-07-27 20:58:41 +00003167 root->r.r_cg_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003168
Jonathan Peyton30419822017-05-12 18:01:32 +00003169 /* setup the root team for this task */
3170 /* allocate the root team structure */
3171 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003172
Jonathan Peyton30419822017-05-12 18:01:32 +00003173 root_team =
3174 __kmp_allocate_team(root,
3175 1, // new_nproc
3176 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003177#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003178 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003179#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003180#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003181 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003182#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003183 &r_icvs,
3184 0 // argc
3185 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3186 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003187#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003188 // Non-NULL value should be assigned to make the debugger display the root
3189 // team.
3190 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003191#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003194
Jonathan Peyton30419822017-05-12 18:01:32 +00003195 root->r.r_root_team = root_team;
3196 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003197
Jonathan Peyton30419822017-05-12 18:01:32 +00003198 /* initialize root team */
3199 root_team->t.t_threads[0] = NULL;
3200 root_team->t.t_nproc = 1;
3201 root_team->t.t_serialized = 1;
3202 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003203 root_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003204 KA_TRACE(
3205 20,
3206 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3207 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003208
Jonathan Peyton30419822017-05-12 18:01:32 +00003209 /* setup the hot team for this task */
3210 /* allocate the hot team structure */
3211 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003212
Jonathan Peyton30419822017-05-12 18:01:32 +00003213 hot_team =
3214 __kmp_allocate_team(root,
3215 1, // new_nproc
3216 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003217#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003218 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003219#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003221 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003223 &r_icvs,
3224 0 // argc
3225 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3226 );
3227 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003228
Jonathan Peyton30419822017-05-12 18:01:32 +00003229 root->r.r_hot_team = hot_team;
3230 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231
Jonathan Peyton30419822017-05-12 18:01:32 +00003232 /* first-time initialization */
3233 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234
Jonathan Peyton30419822017-05-12 18:01:32 +00003235 /* initialize hot team */
3236 hot_team_max_nth = hot_team->t.t_max_nproc;
3237 for (f = 0; f < hot_team_max_nth; ++f) {
3238 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003239 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003240 hot_team->t.t_nproc = 1;
3241 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003242 hot_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003243 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003244}
3245
3246#ifdef KMP_DEBUG
3247
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003249 kmp_team_p const *entry;
3250 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003251} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003252typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254static void __kmp_print_structure_team_accum( // Add team to list of teams.
3255 kmp_team_list_t list, // List of teams.
3256 kmp_team_p const *team // Team to add.
3257 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003258
Jonathan Peyton30419822017-05-12 18:01:32 +00003259 // List must terminate with item where both entry and next are NULL.
3260 // Team is added to the list only once.
3261 // List is sorted in ascending order by team id.
3262 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265
Jonathan Peyton30419822017-05-12 18:01:32 +00003266 KMP_DEBUG_ASSERT(list != NULL);
3267 if (team == NULL) {
3268 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003269 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003270
Jonathan Peyton30419822017-05-12 18:01:32 +00003271 __kmp_print_structure_team_accum(list, team->t.t_parent);
3272 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003273
Jonathan Peyton30419822017-05-12 18:01:32 +00003274 // Search list for the team.
3275 l = list;
3276 while (l->next != NULL && l->entry != team) {
3277 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003278 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003279 if (l->next != NULL) {
3280 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 // Team is not found. Search list again for insertion point.
3284 l = list;
3285 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3286 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003287 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003288
Jonathan Peyton30419822017-05-12 18:01:32 +00003289 // Insert team.
3290 {
3291 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3292 sizeof(kmp_team_list_item_t));
3293 *item = *l;
3294 l->entry = team;
3295 l->next = item;
3296 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297}
3298
Jonathan Peyton30419822017-05-12 18:01:32 +00003299static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003300
Jonathan Peyton30419822017-05-12 18:01:32 +00003301 ) {
3302 __kmp_printf("%s", title);
3303 if (team != NULL) {
3304 __kmp_printf("%2x %p\n", team->t.t_id, team);
3305 } else {
3306 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003307 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003308}
3309
Jonathan Peyton30419822017-05-12 18:01:32 +00003310static void __kmp_print_structure_thread(char const *title,
3311 kmp_info_p const *thread) {
3312 __kmp_printf("%s", title);
3313 if (thread != NULL) {
3314 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3315 } else {
3316 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003317 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318}
3319
Jonathan Peyton30419822017-05-12 18:01:32 +00003320void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003323
Jonathan Peyton30419822017-05-12 18:01:32 +00003324 // Initialize list of teams.
3325 list =
3326 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3327 list->entry = NULL;
3328 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003329
Jonathan Peyton30419822017-05-12 18:01:32 +00003330 __kmp_printf("\n------------------------------\nGlobal Thread "
3331 "Table\n------------------------------\n");
3332 {
3333 int gtid;
3334 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3335 __kmp_printf("%2d", gtid);
3336 if (__kmp_threads != NULL) {
3337 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003338 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003339 if (__kmp_root != NULL) {
3340 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003341 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003342 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003343 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003344 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003345
Jonathan Peyton30419822017-05-12 18:01:32 +00003346 // Print out __kmp_threads array.
3347 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3348 "----------\n");
3349 if (__kmp_threads != NULL) {
3350 int gtid;
3351 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3352 kmp_info_t const *thread = __kmp_threads[gtid];
3353 if (thread != NULL) {
3354 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3355 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3356 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3357 __kmp_print_structure_team(" Serial Team: ",
3358 thread->th.th_serial_team);
3359 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3360 __kmp_print_structure_thread(" Master: ",
3361 thread->th.th_team_master);
3362 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3363 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003365 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003366#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003367 __kmp_print_structure_thread(" Next in pool: ",
3368 thread->th.th_next_pool);
3369 __kmp_printf("\n");
3370 __kmp_print_structure_team_accum(list, thread->th.th_team);
3371 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003372 }
3373 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003374 } else {
3375 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003376 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003377
Jonathan Peyton30419822017-05-12 18:01:32 +00003378 // Print out __kmp_root array.
3379 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3380 "--------\n");
3381 if (__kmp_root != NULL) {
3382 int gtid;
3383 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3384 kmp_root_t const *root = __kmp_root[gtid];
3385 if (root != NULL) {
3386 __kmp_printf("GTID %2d %p:\n", gtid, root);
3387 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3388 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3389 __kmp_print_structure_thread(" Uber Thread: ",
3390 root->r.r_uber_thread);
3391 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3392 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00003393 __kmp_printf(" In Parallel: %2d\n", KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
Jonathan Peyton30419822017-05-12 18:01:32 +00003394 __kmp_printf("\n");
3395 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3396 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003397 }
3398 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 } else {
3400 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003401 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003402
Jonathan Peyton30419822017-05-12 18:01:32 +00003403 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3404 "--------\n");
3405 while (list->next != NULL) {
3406 kmp_team_p const *team = list->entry;
3407 int i;
3408 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3409 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3410 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3411 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3412 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3413 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3414 for (i = 0; i < team->t.t_nproc; ++i) {
3415 __kmp_printf(" Thread %2d: ", i);
3416 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003417 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3419 __kmp_printf("\n");
3420 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003421 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003422
Jonathan Peyton30419822017-05-12 18:01:32 +00003423 // Print out __kmp_thread_pool and __kmp_team_pool.
3424 __kmp_printf("\n------------------------------\nPools\n----------------------"
3425 "--------\n");
3426 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003427 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003429 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431
Jonathan Peyton30419822017-05-12 18:01:32 +00003432 // Free team list.
3433 while (list != NULL) {
3434 kmp_team_list_item_t *item = list;
3435 list = list->next;
3436 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003437 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438}
3439
3440#endif
3441
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442//---------------------------------------------------------------------------
3443// Stuff for per-thread fast random number generator
3444// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003445static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003446 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3447 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3448 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3449 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3450 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3451 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3452 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3453 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3454 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3455 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3456 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003457
3458//---------------------------------------------------------------------------
3459// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003460unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003461 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003462 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003463
Jonathan Peyton30419822017-05-12 18:01:32 +00003464 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465
3466 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003467 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468
3469 return r;
3470}
3471//--------------------------------------------------------
3472// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003473void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003474 unsigned seed = thread->th.th_info.ds.ds_tid;
3475
Jonathan Peyton30419822017-05-12 18:01:32 +00003476 thread->th.th_a =
3477 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3478 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3479 KA_TRACE(30,
3480 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481}
3482
Jim Cownie5e8470a2013-09-27 10:38:44 +00003483#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003484/* reclaim array entries for root threads that are already dead, returns number
3485 * reclaimed */
3486static int __kmp_reclaim_dead_roots(void) {
3487 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003488
Jonathan Peyton30419822017-05-12 18:01:32 +00003489 for (i = 0; i < __kmp_threads_capacity; ++i) {
3490 if (KMP_UBER_GTID(i) &&
3491 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3492 !__kmp_root[i]
3493 ->r.r_active) { // AC: reclaim only roots died in non-active state
3494 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003495 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003496 }
3497 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003498}
3499#endif
3500
Jonathan Peyton30419822017-05-12 18:01:32 +00003501/* This function attempts to create free entries in __kmp_threads and
3502 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003503
Jonathan Peyton30419822017-05-12 18:01:32 +00003504 For Windows* OS static library, the first mechanism used is to reclaim array
3505 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003506
Jonathan Peyton30419822017-05-12 18:01:32 +00003507 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3508 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3509 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3510 threadprivate cache array has been created. Synchronization with
3511 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003512
Jonathan Peyton30419822017-05-12 18:01:32 +00003513 After any dead root reclamation, if the clipping value allows array expansion
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003514 to result in the generation of a total of nNeed free slots, the function does
3515 that expansion. If not, nothing is done beyond the possible initial root
3516 thread reclamation.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003517
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 If any argument is negative, the behavior is undefined. */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003519static int __kmp_expand_threads(int nNeed) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003520 int added = 0;
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003521 int minimumRequiredCapacity;
3522 int newCapacity;
3523 kmp_info_t **newThreads;
3524 kmp_root_t **newRoot;
3525
3526// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3527// resizing __kmp_threads does not need additional protection if foreign
3528// threads are present
Jim Cownie5e8470a2013-09-27 10:38:44 +00003529
Jonathan Peyton99016992015-05-26 17:32:53 +00003530#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003531 /* only for Windows static library */
3532 /* reclaim array entries for root threads that are already dead */
3533 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003534
Jonathan Peyton30419822017-05-12 18:01:32 +00003535 if (nNeed) {
3536 nNeed -= added;
3537 if (nNeed < 0)
3538 nNeed = 0;
3539 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540#endif
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003541 if (nNeed <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003542 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003543
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003544 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3545 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3546 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3547 // > __kmp_max_nth in one of two ways:
3548 //
3549 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3550 // may not be resused by another thread, so we may need to increase
3551 // __kmp_threads_capacity to __kmp_max_nth + 1.
3552 //
3553 // 2) New foreign root(s) are encountered. We always register new foreign
3554 // roots. This may cause a smaller # of threads to be allocated at
3555 // subsequent parallel regions, but the worker threads hang around (and
3556 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3557 //
3558 // Anyway, that is the reason for moving the check to see if
3559 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3560 // instead of having it performed here. -BB
Jonathan Peyton30419822017-05-12 18:01:32 +00003561
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003562 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
Jonathan Peyton30419822017-05-12 18:01:32 +00003563
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003564 /* compute expansion headroom to check if we can expand */
3565 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3566 /* possible expansion too small -- give up */
3567 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003568 }
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003569 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3570
3571 newCapacity = __kmp_threads_capacity;
3572 do {
3573 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3574 : __kmp_sys_max_nth;
3575 } while (newCapacity < minimumRequiredCapacity);
3576 newThreads = (kmp_info_t **)__kmp_allocate(
3577 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3578 newRoot =
3579 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3580 KMP_MEMCPY(newThreads, __kmp_threads,
3581 __kmp_threads_capacity * sizeof(kmp_info_t *));
3582 KMP_MEMCPY(newRoot, __kmp_root,
3583 __kmp_threads_capacity * sizeof(kmp_root_t *));
3584
3585 kmp_info_t **temp_threads = __kmp_threads;
3586 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3587 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3588 __kmp_free(temp_threads);
3589 added += newCapacity - __kmp_threads_capacity;
3590 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3591
3592 if (newCapacity > __kmp_tp_capacity) {
3593 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3594 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3595 __kmp_threadprivate_resize_cache(newCapacity);
3596 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3597 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3598 }
3599 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3600 }
3601
Jonathan Peyton30419822017-05-12 18:01:32 +00003602 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003603}
3604
Jonathan Peyton30419822017-05-12 18:01:32 +00003605/* Register the current thread as a root thread and obtain our gtid. We must
3606 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3607 thread that calls from __kmp_do_serial_initialize() */
3608int __kmp_register_root(int initial_thread) {
3609 kmp_info_t *root_thread;
3610 kmp_root_t *root;
3611 int gtid;
3612 int capacity;
3613 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3614 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3615 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003616
Jonathan Peyton30419822017-05-12 18:01:32 +00003617 /* 2007-03-02:
3618 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3619 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3620 work as expected -- it may return false (that means there is at least one
3621 empty slot in __kmp_threads array), but it is possible the only free slot
3622 is #0, which is reserved for initial thread and so cannot be used for this
3623 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 However, right solution seems to be not reserving slot #0 for initial
3626 thread because:
3627 (1) there is no magic in slot #0,
3628 (2) we cannot detect initial thread reliably (the first thread which does
3629 serial initialization may be not a real initial thread).
3630 */
3631 capacity = __kmp_threads_capacity;
3632 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3633 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003634 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003635
Jonathan Peyton30419822017-05-12 18:01:32 +00003636 /* see if there are too many threads */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003637 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003639 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3640 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3641 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003642 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003643 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3644 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003646 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003647
3648 /* find an available thread slot */
3649 /* Don't reassign the zero slot since we need that to only be used by initial
3650 thread */
3651 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3652 gtid++)
3653 ;
3654 KA_TRACE(1,
3655 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3656 KMP_ASSERT(gtid < __kmp_threads_capacity);
3657
3658 /* update global accounting */
3659 __kmp_all_nth++;
3660 TCW_4(__kmp_nth, __kmp_nth + 1);
3661
3662 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3663 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3664 if (__kmp_adjust_gtid_mode) {
3665 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3666 if (TCR_4(__kmp_gtid_mode) != 2) {
3667 TCW_4(__kmp_gtid_mode, 2);
3668 }
3669 } else {
3670 if (TCR_4(__kmp_gtid_mode) != 1) {
3671 TCW_4(__kmp_gtid_mode, 1);
3672 }
3673 }
3674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675
3676#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003677 /* Adjust blocktime to zero if necessary */
3678 /* Middle initialization might not have occurred yet */
3679 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3680 if (__kmp_nth > __kmp_avail_proc) {
3681 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003684#endif /* KMP_ADJUST_BLOCKTIME */
3685
Jonathan Peyton30419822017-05-12 18:01:32 +00003686 /* setup this new hierarchy */
3687 if (!(root = __kmp_root[gtid])) {
3688 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3689 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003691
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003692#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003693 // Initialize stats as soon as possible (right after gtid assignment).
3694 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3695 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3696 KMP_SET_THREAD_STATE(SERIAL_REGION);
3697 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003698#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003699 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700
Jonathan Peyton30419822017-05-12 18:01:32 +00003701 /* setup new root thread structure */
3702 if (root->r.r_uber_thread) {
3703 root_thread = root->r.r_uber_thread;
3704 } else {
3705 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3706 if (__kmp_storage_map) {
3707 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003708 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003709 root_thread->th.th_info.ds.ds_gtid = gtid;
Joachim Protze82e94a52017-11-01 10:08:30 +00003710#if OMPT_SUPPORT
3711 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3712#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003713 root_thread->th.th_root = root;
3714 if (__kmp_env_consistency_check) {
3715 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3716 }
3717#if USE_FAST_MEMORY
3718 __kmp_initialize_fast_memory(root_thread);
3719#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
Jonathan Peyton30419822017-05-12 18:01:32 +00003721#if KMP_USE_BGET
3722 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3723 __kmp_initialize_bget(root_thread);
3724#endif
3725 __kmp_init_random(root_thread); // Initialize random number generator
3726 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003727
Jonathan Peyton30419822017-05-12 18:01:32 +00003728 /* setup the serial team held in reserve by the root thread */
3729 if (!root_thread->th.th_serial_team) {
3730 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3731 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3732 root_thread->th.th_serial_team =
3733 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003734#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003735 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003736#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003738 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003740 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3741 }
3742 KMP_ASSERT(root_thread->th.th_serial_team);
3743 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3744 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
Jonathan Peyton30419822017-05-12 18:01:32 +00003746 /* drop root_thread into place */
3747 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748
Jonathan Peyton30419822017-05-12 18:01:32 +00003749 root->r.r_root_team->t.t_threads[0] = root_thread;
3750 root->r.r_hot_team->t.t_threads[0] = root_thread;
3751 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3752 // AC: the team created in reserve, not for execution (it is unused for now).
3753 root_thread->th.th_serial_team->t.t_serialized = 0;
3754 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755
Jonathan Peyton30419822017-05-12 18:01:32 +00003756 /* initialize the thread, get it ready to go */
3757 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3758 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759
Jonathan Peyton30419822017-05-12 18:01:32 +00003760 /* prepare the master thread for get_gtid() */
3761 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003762
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003763#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003764 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003765#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003766
Jonathan Peyton30419822017-05-12 18:01:32 +00003767#ifdef KMP_TDATA_GTID
3768 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003769#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3771 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3772
3773 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3774 "plain=%u\n",
3775 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3776 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3777 KMP_INIT_BARRIER_STATE));
3778 { // Initialize barrier data.
3779 int b;
3780 for (b = 0; b < bs_last_barrier; ++b) {
3781 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3782#if USE_DEBUGGER
3783 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3784#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003785 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003786 }
3787 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3788 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003789
Alp Toker763b9392014-02-28 09:42:41 +00003790#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003791#if OMP_40_ENABLED
3792 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3793 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3794 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3795 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3796#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003797
Jonathan Peyton30419822017-05-12 18:01:32 +00003798 if (TCR_4(__kmp_init_middle)) {
3799 __kmp_affinity_set_init_mask(gtid, TRUE);
3800 }
Alp Toker763b9392014-02-28 09:42:41 +00003801#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802
Jonathan Peyton30419822017-05-12 18:01:32 +00003803 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003804
Joachim Protze82e94a52017-11-01 10:08:30 +00003805#if OMPT_SUPPORT
3806 if (!initial_thread && ompt_enabled.enabled) {
3807
3808 ompt_thread_t *root_thread = ompt_get_thread();
3809
3810 ompt_set_thread_state(root_thread, omp_state_overhead);
3811
3812 if (ompt_enabled.ompt_callback_thread_begin) {
3813 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3814 ompt_thread_initial, __ompt_get_thread_data_internal());
3815 }
3816 ompt_data_t *task_data;
3817 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3818 if (ompt_enabled.ompt_callback_task_create) {
3819 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3820 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3821 // initial task has nothing to return to
3822 }
3823
3824 ompt_set_thread_state(root_thread, omp_state_work_serial);
3825 }
3826#endif
3827
Jonathan Peyton30419822017-05-12 18:01:32 +00003828 KMP_MB();
3829 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003830
Jonathan Peyton30419822017-05-12 18:01:32 +00003831 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003832}
3833
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003834#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003835static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3836 const int max_level) {
3837 int i, n, nth;
3838 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3839 if (!hot_teams || !hot_teams[level].hot_team) {
3840 return 0;
3841 }
3842 KMP_DEBUG_ASSERT(level < max_level);
3843 kmp_team_t *team = hot_teams[level].hot_team;
3844 nth = hot_teams[level].hot_team_nth;
3845 n = nth - 1; // master is not freed
3846 if (level < max_level - 1) {
3847 for (i = 0; i < nth; ++i) {
3848 kmp_info_t *th = team->t.t_threads[i];
3849 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3850 if (i > 0 && th->th.th_hot_teams) {
3851 __kmp_free(th->th.th_hot_teams);
3852 th->th.th_hot_teams = NULL;
3853 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003854 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003855 }
3856 __kmp_free_team(root, team, NULL);
3857 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003858}
3859#endif
3860
Jonathan Peyton30419822017-05-12 18:01:32 +00003861// Resets a root thread and clear its root and hot teams.
3862// Returns the number of __kmp_threads entries directly and indirectly freed.
3863static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3864 kmp_team_t *root_team = root->r.r_root_team;
3865 kmp_team_t *hot_team = root->r.r_hot_team;
3866 int n = hot_team->t.t_nproc;
3867 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003868
Jonathan Peyton30419822017-05-12 18:01:32 +00003869 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003870
Jonathan Peyton30419822017-05-12 18:01:32 +00003871 root->r.r_root_team = NULL;
3872 root->r.r_hot_team = NULL;
3873 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3874 // before call to __kmp_free_team().
3875 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003876#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003877 if (__kmp_hot_teams_max_level >
3878 0) { // need to free nested hot teams and their threads if any
3879 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3880 kmp_info_t *th = hot_team->t.t_threads[i];
3881 if (__kmp_hot_teams_max_level > 1) {
3882 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3883 }
3884 if (th->th.th_hot_teams) {
3885 __kmp_free(th->th.th_hot_teams);
3886 th->th.th_hot_teams = NULL;
3887 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003888 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003889 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003890#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003891 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003892
Jonathan Peyton30419822017-05-12 18:01:32 +00003893 // Before we can reap the thread, we need to make certain that all other
3894 // threads in the teams that had this root as ancestor have stopped trying to
3895 // steal tasks.
3896 if (__kmp_tasking_mode != tskm_immediate_exec) {
3897 __kmp_wait_to_unref_task_teams();
3898 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003899
Jonathan Peyton30419822017-05-12 18:01:32 +00003900#if KMP_OS_WINDOWS
3901 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3902 KA_TRACE(
3903 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3904 "\n",
3905 (LPVOID) & (root->r.r_uber_thread->th),
3906 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3907 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3908#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003909
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003910#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003911 if (ompt_enabled.ompt_callback_thread_end) {
3912 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3913 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
Jonathan Peyton30419822017-05-12 18:01:32 +00003914 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003915#endif
3916
Jonathan Peyton30419822017-05-12 18:01:32 +00003917 TCW_4(__kmp_nth,
3918 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peytonf4392462017-07-27 20:58:41 +00003919 root->r.r_cg_nthreads--;
3920
Jonathan Peyton30419822017-05-12 18:01:32 +00003921 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003922
Jonathan Peyton30419822017-05-12 18:01:32 +00003923 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3924 // of freeing.
3925 root->r.r_uber_thread = NULL;
3926 /* mark root as no longer in use */
3927 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003928
Jonathan Peyton30419822017-05-12 18:01:32 +00003929 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003930}
3931
Jonathan Peyton30419822017-05-12 18:01:32 +00003932void __kmp_unregister_root_current_thread(int gtid) {
3933 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3934 /* this lock should be ok, since unregister_root_current_thread is never
3935 called during an abort, only during a normal close. furthermore, if you
3936 have the forkjoin lock, you should never try to get the initz lock */
3937 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3938 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3939 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3940 "exiting T#%d\n",
3941 gtid));
3942 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3943 return;
3944 }
3945 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003946
Jonathan Peyton30419822017-05-12 18:01:32 +00003947 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3948 KMP_ASSERT(KMP_UBER_GTID(gtid));
3949 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3950 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951
Jonathan Peyton30419822017-05-12 18:01:32 +00003952 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003954#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003955 kmp_info_t *thread = __kmp_threads[gtid];
3956 kmp_team_t *team = thread->th.th_team;
3957 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003958
Jonathan Peyton30419822017-05-12 18:01:32 +00003959 // we need to wait for the proxy tasks before finishing the thread
3960 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003961#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003962 // the runtime is shutting down so we won't report any events
Joachim Protze82e94a52017-11-01 10:08:30 +00003963 thread->th.ompt_thread_info.state = omp_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003964#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003965 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3966 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003967#endif
3968
Jonathan Peyton30419822017-05-12 18:01:32 +00003969 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970
Jonathan Peyton30419822017-05-12 18:01:32 +00003971 /* free up this thread slot */
3972 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003973#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00003974 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003975#endif
3976
Jonathan Peyton30419822017-05-12 18:01:32 +00003977 KMP_MB();
3978 KC_TRACE(10,
3979 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003980
Jonathan Peyton30419822017-05-12 18:01:32 +00003981 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003982}
3983
Jonathan Peyton2321d572015-06-08 19:25:25 +00003984#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003985/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00003986 Unregisters a root thread that is not the current thread. Returns the number
3987 of __kmp_threads entries freed as a result. */
3988static int __kmp_unregister_root_other_thread(int gtid) {
3989 kmp_root_t *root = __kmp_root[gtid];
3990 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003991
Jonathan Peyton30419822017-05-12 18:01:32 +00003992 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3993 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3994 KMP_ASSERT(KMP_UBER_GTID(gtid));
3995 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3996 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003997
Jonathan Peyton30419822017-05-12 18:01:32 +00003998 r = __kmp_reset_root(gtid, root);
3999 KC_TRACE(10,
4000 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4001 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004002}
Jonathan Peyton2321d572015-06-08 19:25:25 +00004003#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004004
Jim Cownie5e8470a2013-09-27 10:38:44 +00004005#if KMP_DEBUG
4006void __kmp_task_info() {
4007
Jonathan Peyton30419822017-05-12 18:01:32 +00004008 kmp_int32 gtid = __kmp_entry_gtid();
4009 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4010 kmp_info_t *this_thr = __kmp_threads[gtid];
4011 kmp_team_t *steam = this_thr->th.th_serial_team;
4012 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004013
Jonathan Peyton30419822017-05-12 18:01:32 +00004014 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
4015 "ptask=%p\n",
4016 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4017 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018}
4019#endif // KMP_DEBUG
4020
Jonathan Peyton30419822017-05-12 18:01:32 +00004021/* TODO optimize with one big memclr, take out what isn't needed, split
4022 responsibility to workers as much as possible, and delay initialization of
4023 features as much as possible */
4024static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4025 int tid, int gtid) {
4026 /* this_thr->th.th_info.ds.ds_gtid is setup in
4027 kmp_allocate_thread/create_worker.
4028 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4029 kmp_info_t *master = team->t.t_threads[0];
4030 KMP_DEBUG_ASSERT(this_thr != NULL);
4031 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4032 KMP_DEBUG_ASSERT(team);
4033 KMP_DEBUG_ASSERT(team->t.t_threads);
4034 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4035 KMP_DEBUG_ASSERT(master);
4036 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004037
Jonathan Peyton30419822017-05-12 18:01:32 +00004038 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039
Jonathan Peyton30419822017-05-12 18:01:32 +00004040 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004041
Jonathan Peyton30419822017-05-12 18:01:32 +00004042 this_thr->th.th_info.ds.ds_tid = tid;
4043 this_thr->th.th_set_nproc = 0;
4044 if (__kmp_tasking_mode != tskm_immediate_exec)
4045 // When tasking is possible, threads are not safe to reap until they are
4046 // done tasking; this will be set when tasking code is exited in wait
4047 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4048 else // no tasking --> always safe to reap
4049 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004050#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004051 this_thr->th.th_set_proc_bind = proc_bind_default;
4052#if KMP_AFFINITY_SUPPORTED
4053 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004055#endif
4056 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004057
Jonathan Peyton30419822017-05-12 18:01:32 +00004058 /* setup the thread's cache of the team structure */
4059 this_thr->th.th_team_nproc = team->t.t_nproc;
4060 this_thr->th.th_team_master = master;
4061 this_thr->th.th_team_serialized = team->t.t_serialized;
4062 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063
Jonathan Peyton30419822017-05-12 18:01:32 +00004064 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065
Jonathan Peyton30419822017-05-12 18:01:32 +00004066 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4067 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004068
Jonathan Peyton30419822017-05-12 18:01:32 +00004069 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4070 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071
Jonathan Peyton30419822017-05-12 18:01:32 +00004072 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4073 tid, gtid, this_thr, this_thr->th.th_current_task));
4074 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4075 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004076
Jonathan Peyton30419822017-05-12 18:01:32 +00004077 /* TODO no worksharing in speculative threads */
4078 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004079
Jonathan Peyton30419822017-05-12 18:01:32 +00004080 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081
Jonathan Peyton30419822017-05-12 18:01:32 +00004082 if (!this_thr->th.th_pri_common) {
4083 this_thr->th.th_pri_common =
4084 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4085 if (__kmp_storage_map) {
4086 __kmp_print_storage_map_gtid(
4087 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4088 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004089 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004090 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004091 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004092
Jonathan Peyton30419822017-05-12 18:01:32 +00004093 /* Initialize dynamic dispatch */
4094 {
4095 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4096 // Use team max_nproc since this will never change for the team.
4097 size_t disp_size =
4098 sizeof(dispatch_private_info_t) *
4099 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4100 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4101 team->t.t_max_nproc));
4102 KMP_ASSERT(dispatch);
4103 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4104 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004105
Jonathan Peyton30419822017-05-12 18:01:32 +00004106 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004107#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004108 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004109#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 if (!dispatch->th_disp_buffer) {
4111 dispatch->th_disp_buffer =
4112 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004113
Jonathan Peyton30419822017-05-12 18:01:32 +00004114 if (__kmp_storage_map) {
4115 __kmp_print_storage_map_gtid(
4116 gtid, &dispatch->th_disp_buffer[0],
4117 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4118 ? 1
4119 : __kmp_dispatch_num_buffers],
4120 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4121 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4122 gtid, team->t.t_id, gtid);
4123 }
4124 } else {
4125 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004126 }
4127
Jonathan Peyton30419822017-05-12 18:01:32 +00004128 dispatch->th_dispatch_pr_current = 0;
4129 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130
Jonathan Peyton30419822017-05-12 18:01:32 +00004131 dispatch->th_deo_fcn = 0; /* ORDERED */
4132 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4133 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004134
Jonathan Peyton30419822017-05-12 18:01:32 +00004135 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004136
Jonathan Peyton30419822017-05-12 18:01:32 +00004137 if (!this_thr->th.th_task_state_memo_stack) {
4138 size_t i;
4139 this_thr->th.th_task_state_memo_stack =
4140 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4141 this_thr->th.th_task_state_top = 0;
4142 this_thr->th.th_task_state_stack_sz = 4;
4143 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4144 ++i) // zero init the stack
4145 this_thr->th.th_task_state_memo_stack[i] = 0;
4146 }
4147
4148 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4149 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4150
4151 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004152}
4153
Jonathan Peyton30419822017-05-12 18:01:32 +00004154/* allocate a new thread for the requesting team. this is only called from
4155 within a forkjoin critical section. we will first try to get an available
4156 thread from the thread pool. if none is available, we will fork a new one
4157 assuming we are able to create a new one. this should be assured, as the
4158 caller should check on this first. */
4159kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4160 int new_tid) {
4161 kmp_team_t *serial_team;
4162 kmp_info_t *new_thr;
4163 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004164
Jonathan Peyton30419822017-05-12 18:01:32 +00004165 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4166 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004167#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004168 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004169#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004170 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004171
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 /* first, try to get one from the thread pool */
4173 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004174
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004175 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004176 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4177 if (new_thr == __kmp_thread_pool_insert_pt) {
4178 __kmp_thread_pool_insert_pt = NULL;
4179 }
4180 TCW_4(new_thr->th.th_in_pool, FALSE);
4181 // Don't touch th_active_in_pool or th_active.
4182 // The worker thread adjusts those flags as it sleeps/awakens.
4183 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004184
Jonathan Peyton30419822017-05-12 18:01:32 +00004185 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4186 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4187 KMP_ASSERT(!new_thr->th.th_team);
4188 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4189 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004190
Jonathan Peyton30419822017-05-12 18:01:32 +00004191 /* setup the thread structure */
4192 __kmp_initialize_info(new_thr, team, new_tid,
4193 new_thr->th.th_info.ds.ds_gtid);
4194 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004195
Jonathan Peyton30419822017-05-12 18:01:32 +00004196 TCW_4(__kmp_nth, __kmp_nth + 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00004197 root->r.r_cg_nthreads++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198
Jonathan Peyton30419822017-05-12 18:01:32 +00004199 new_thr->th.th_task_state = 0;
4200 new_thr->th.th_task_state_top = 0;
4201 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004202
Jim Cownie5e8470a2013-09-27 10:38:44 +00004203#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004204 /* Adjust blocktime back to zero if necessary */
4205 /* Middle initialization might not have occurred yet */
4206 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4207 if (__kmp_nth > __kmp_avail_proc) {
4208 __kmp_zero_bt = TRUE;
4209 }
4210 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004211#endif /* KMP_ADJUST_BLOCKTIME */
4212
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004213#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004214 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4215 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004216 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004217 kmp_balign_t *balign = new_thr->th.th_bar;
4218 for (b = 0; b < bs_last_barrier; ++b)
4219 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004220#endif
4221
Jonathan Peyton30419822017-05-12 18:01:32 +00004222 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4223 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004224
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225 KMP_MB();
4226 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004227 }
4228
4229 /* no, well fork a new one */
4230 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4231 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4232
4233#if KMP_USE_MONITOR
4234 // If this is the first worker thread the RTL is creating, then also
4235 // launch the monitor thread. We try to do this as early as possible.
4236 if (!TCR_4(__kmp_init_monitor)) {
4237 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4238 if (!TCR_4(__kmp_init_monitor)) {
4239 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4240 TCW_4(__kmp_init_monitor, 1);
4241 __kmp_create_monitor(&__kmp_monitor);
4242 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4243#if KMP_OS_WINDOWS
4244 // AC: wait until monitor has started. This is a fix for CQ232808.
4245 // The reason is that if the library is loaded/unloaded in a loop with
4246 // small (parallel) work in between, then there is high probability that
4247 // monitor thread started after the library shutdown. At shutdown it is
4248 // too late to cope with the problem, because when the master is in
4249 // DllMain (process detach) the monitor has no chances to start (it is
4250 // blocked), and master has no means to inform the monitor that the
4251 // library has gone, because all the memory which the monitor can access
4252 // is going to be released/reset.
4253 while (TCR_4(__kmp_init_monitor) < 2) {
4254 KMP_YIELD(TRUE);
4255 }
4256 KF_TRACE(10, ("after monitor thread has started\n"));
4257#endif
4258 }
4259 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4260 }
4261#endif
4262
4263 KMP_MB();
4264 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4265 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4266 }
4267
4268 /* allocate space for it. */
4269 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4270
4271 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4272
4273 if (__kmp_storage_map) {
4274 __kmp_print_thread_storage_map(new_thr, new_gtid);
4275 }
4276
4277 // add the reserve serialized team, initialized from the team's master thread
4278 {
4279 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4280 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4281 new_thr->th.th_serial_team = serial_team =
4282 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4283#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004284 ompt_data_none, // root parallel id
Jonathan Peyton30419822017-05-12 18:01:32 +00004285#endif
4286#if OMP_40_ENABLED
4287 proc_bind_default,
4288#endif
4289 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4290 }
4291 KMP_ASSERT(serial_team);
4292 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4293 // execution (it is unused for now).
4294 serial_team->t.t_threads[0] = new_thr;
4295 KF_TRACE(10,
4296 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4297 new_thr));
4298
4299 /* setup the thread structures */
4300 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4301
4302#if USE_FAST_MEMORY
4303 __kmp_initialize_fast_memory(new_thr);
4304#endif /* USE_FAST_MEMORY */
4305
4306#if KMP_USE_BGET
4307 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4308 __kmp_initialize_bget(new_thr);
4309#endif
4310
4311 __kmp_init_random(new_thr); // Initialize random number generator
4312
4313 /* Initialize these only once when thread is grabbed for a team allocation */
4314 KA_TRACE(20,
4315 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4316 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4317
4318 int b;
4319 kmp_balign_t *balign = new_thr->th.th_bar;
4320 for (b = 0; b < bs_last_barrier; ++b) {
4321 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4322 balign[b].bb.team = NULL;
4323 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4324 balign[b].bb.use_oncore_barrier = 0;
4325 }
4326
4327 new_thr->th.th_spin_here = FALSE;
4328 new_thr->th.th_next_waiting = 0;
4329
4330#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4331 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4332 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4333 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4334 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4335#endif
4336
4337 TCW_4(new_thr->th.th_in_pool, FALSE);
4338 new_thr->th.th_active_in_pool = FALSE;
4339 TCW_4(new_thr->th.th_active, TRUE);
4340
4341 /* adjust the global counters */
4342 __kmp_all_nth++;
4343 __kmp_nth++;
4344
Jonathan Peytonf4392462017-07-27 20:58:41 +00004345 root->r.r_cg_nthreads++;
4346
Jonathan Peyton30419822017-05-12 18:01:32 +00004347 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4348 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4349 if (__kmp_adjust_gtid_mode) {
4350 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4351 if (TCR_4(__kmp_gtid_mode) != 2) {
4352 TCW_4(__kmp_gtid_mode, 2);
4353 }
4354 } else {
4355 if (TCR_4(__kmp_gtid_mode) != 1) {
4356 TCW_4(__kmp_gtid_mode, 1);
4357 }
4358 }
4359 }
4360
4361#ifdef KMP_ADJUST_BLOCKTIME
4362 /* Adjust blocktime back to zero if necessary */
4363 /* Middle initialization might not have occurred yet */
4364 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4365 if (__kmp_nth > __kmp_avail_proc) {
4366 __kmp_zero_bt = TRUE;
4367 }
4368 }
4369#endif /* KMP_ADJUST_BLOCKTIME */
4370
4371 /* actually fork it and create the new worker thread */
4372 KF_TRACE(
4373 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4374 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4375 KF_TRACE(10,
4376 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4377
4378 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4379 new_gtid));
4380 KMP_MB();
4381 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004382}
4383
Jonathan Peyton30419822017-05-12 18:01:32 +00004384/* Reinitialize team for reuse.
4385 The hot team code calls this case at every fork barrier, so EPCC barrier
4386 test are extremely sensitive to changes in it, esp. writes to the team
4387 struct, which cause a cache invalidation in all threads.
4388 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4389static void __kmp_reinitialize_team(kmp_team_t *team,
4390 kmp_internal_control_t *new_icvs,
4391 ident_t *loc) {
4392 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4393 team->t.t_threads[0], team));
4394 KMP_DEBUG_ASSERT(team && new_icvs);
4395 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4396 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004397
Jonathan Peyton30419822017-05-12 18:01:32 +00004398 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004399 // Copy ICVs to the master thread's implicit taskdata
4400 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4401 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004402
Jonathan Peyton30419822017-05-12 18:01:32 +00004403 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4404 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004405}
4406
Jonathan Peyton30419822017-05-12 18:01:32 +00004407/* Initialize the team data structure.
4408 This assumes the t_threads and t_max_nproc are already set.
4409 Also, we don't touch the arguments */
4410static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4411 kmp_internal_control_t *new_icvs,
4412 ident_t *loc) {
4413 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004414
Jonathan Peyton30419822017-05-12 18:01:32 +00004415 /* verify */
4416 KMP_DEBUG_ASSERT(team);
4417 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4418 KMP_DEBUG_ASSERT(team->t.t_threads);
4419 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004420
Jonathan Peyton30419822017-05-12 18:01:32 +00004421 team->t.t_master_tid = 0; /* not needed */
4422 /* team->t.t_master_bar; not needed */
4423 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4424 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425
Jonathan Peyton30419822017-05-12 18:01:32 +00004426 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4427 team->t.t_next_pool = NULL;
4428 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4429 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430
Jonathan Peyton30419822017-05-12 18:01:32 +00004431 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4432 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004433
Jonathan Peyton30419822017-05-12 18:01:32 +00004434 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004435 team->t.t_sched.sched = new_icvs->sched.sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004436
4437#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004438 team->t.t_fp_control_saved = FALSE; /* not needed */
4439 team->t.t_x87_fpu_control_word = 0; /* not needed */
4440 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004441#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4442
Jonathan Peyton30419822017-05-12 18:01:32 +00004443 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004444
Jonathan Peyton30419822017-05-12 18:01:32 +00004445 team->t.t_ordered.dt.t_value = 0;
4446 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004447
Jonathan Peyton30419822017-05-12 18:01:32 +00004448 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004449
4450#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004451 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004452#endif
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004453#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00004454 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004455#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004456
Jonathan Peyton30419822017-05-12 18:01:32 +00004457 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004458
Jonathan Peyton30419822017-05-12 18:01:32 +00004459 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004460
Jonathan Peyton30419822017-05-12 18:01:32 +00004461 KMP_MB();
4462 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004463}
4464
Alp Toker98758b02014-03-02 04:12:06 +00004465#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004466/* Sets full mask for thread and returns old mask, no changes to structures. */
4467static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004468__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4469 if (KMP_AFFINITY_CAPABLE()) {
4470 int status;
4471 if (old_mask != NULL) {
4472 status = __kmp_get_system_affinity(old_mask, TRUE);
4473 int error = errno;
4474 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004475 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4476 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004477 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004478 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004479 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4480 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004481}
4482#endif
4483
Alp Toker98758b02014-03-02 04:12:06 +00004484#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004485
Jim Cownie5e8470a2013-09-27 10:38:44 +00004486// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4487// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004488// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004489// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004490static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4491 // Copy the master thread's place partion to the team struct
4492 kmp_info_t *master_th = team->t.t_threads[0];
4493 KMP_DEBUG_ASSERT(master_th != NULL);
4494 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4495 int first_place = master_th->th.th_first_place;
4496 int last_place = master_th->th.th_last_place;
4497 int masters_place = master_th->th.th_current_place;
4498 team->t.t_first_place = first_place;
4499 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004500
Jonathan Peyton30419822017-05-12 18:01:32 +00004501 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4502 "bound to place %d partition = [%d,%d]\n",
4503 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4504 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004505
Jonathan Peyton30419822017-05-12 18:01:32 +00004506 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004507
Jonathan Peyton30419822017-05-12 18:01:32 +00004508 case proc_bind_default:
4509 // serial teams might have the proc_bind policy set to proc_bind_default. It
4510 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4511 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4512 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004513
Jonathan Peyton30419822017-05-12 18:01:32 +00004514 case proc_bind_master: {
4515 int f;
4516 int n_th = team->t.t_nproc;
4517 for (f = 1; f < n_th; f++) {
4518 kmp_info_t *th = team->t.t_threads[f];
4519 KMP_DEBUG_ASSERT(th != NULL);
4520 th->th.th_first_place = first_place;
4521 th->th.th_last_place = last_place;
4522 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004523
Jonathan Peyton30419822017-05-12 18:01:32 +00004524 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4525 "partition = [%d,%d]\n",
4526 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4527 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004528 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004529 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004530
Jonathan Peyton30419822017-05-12 18:01:32 +00004531 case proc_bind_close: {
4532 int f;
4533 int n_th = team->t.t_nproc;
4534 int n_places;
4535 if (first_place <= last_place) {
4536 n_places = last_place - first_place + 1;
4537 } else {
4538 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4539 }
4540 if (n_th <= n_places) {
4541 int place = masters_place;
4542 for (f = 1; f < n_th; f++) {
4543 kmp_info_t *th = team->t.t_threads[f];
4544 KMP_DEBUG_ASSERT(th != NULL);
4545
4546 if (place == last_place) {
4547 place = first_place;
4548 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4549 place = 0;
4550 } else {
4551 place++;
4552 }
4553 th->th.th_first_place = first_place;
4554 th->th.th_last_place = last_place;
4555 th->th.th_new_place = place;
4556
4557 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4558 "partition = [%d,%d]\n",
4559 __kmp_gtid_from_thread(team->t.t_threads[f]),
4560 team->t.t_id, f, place, first_place, last_place));
4561 }
4562 } else {
4563 int S, rem, gap, s_count;
4564 S = n_th / n_places;
4565 s_count = 0;
4566 rem = n_th - (S * n_places);
4567 gap = rem > 0 ? n_places / rem : n_places;
4568 int place = masters_place;
4569 int gap_ct = gap;
4570 for (f = 0; f < n_th; f++) {
4571 kmp_info_t *th = team->t.t_threads[f];
4572 KMP_DEBUG_ASSERT(th != NULL);
4573
4574 th->th.th_first_place = first_place;
4575 th->th.th_last_place = last_place;
4576 th->th.th_new_place = place;
4577 s_count++;
4578
4579 if ((s_count == S) && rem && (gap_ct == gap)) {
4580 // do nothing, add an extra thread to place on next iteration
4581 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4582 // we added an extra thread to this place; move to next place
4583 if (place == last_place) {
4584 place = first_place;
4585 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4586 place = 0;
4587 } else {
4588 place++;
4589 }
4590 s_count = 0;
4591 gap_ct = 1;
4592 rem--;
4593 } else if (s_count == S) { // place full; don't add extra
4594 if (place == last_place) {
4595 place = first_place;
4596 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4597 place = 0;
4598 } else {
4599 place++;
4600 }
4601 gap_ct++;
4602 s_count = 0;
4603 }
4604
4605 KA_TRACE(100,
4606 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4607 "partition = [%d,%d]\n",
4608 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4609 th->th.th_new_place, first_place, last_place));
4610 }
4611 KMP_DEBUG_ASSERT(place == masters_place);
4612 }
4613 } break;
4614
4615 case proc_bind_spread: {
4616 int f;
4617 int n_th = team->t.t_nproc;
4618 int n_places;
4619 int thidx;
4620 if (first_place <= last_place) {
4621 n_places = last_place - first_place + 1;
4622 } else {
4623 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4624 }
4625 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004626 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004627
Paul Osmialowskia0162792017-08-10 23:04:11 +00004628 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4629 int S = n_places / n_th;
4630 int s_count, rem, gap, gap_ct;
4631
4632 place = masters_place;
4633 rem = n_places - n_th * S;
4634 gap = rem ? n_th / rem : 1;
4635 gap_ct = gap;
4636 thidx = n_th;
4637 if (update_master_only == 1)
4638 thidx = 1;
4639 for (f = 0; f < thidx; f++) {
4640 kmp_info_t *th = team->t.t_threads[f];
4641 KMP_DEBUG_ASSERT(th != NULL);
4642
4643 th->th.th_first_place = place;
4644 th->th.th_new_place = place;
4645 s_count = 1;
4646 while (s_count < S) {
4647 if (place == last_place) {
4648 place = first_place;
4649 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4650 place = 0;
4651 } else {
4652 place++;
4653 }
4654 s_count++;
4655 }
4656 if (rem && (gap_ct == gap)) {
4657 if (place == last_place) {
4658 place = first_place;
4659 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4660 place = 0;
4661 } else {
4662 place++;
4663 }
4664 rem--;
4665 gap_ct = 0;
4666 }
4667 th->th.th_last_place = place;
4668 gap_ct++;
4669
Jonathan Peyton30419822017-05-12 18:01:32 +00004670 if (place == last_place) {
4671 place = first_place;
4672 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4673 place = 0;
4674 } else {
4675 place++;
4676 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004677
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004678 KA_TRACE(100,
4679 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4680 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4681 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4682 f, th->th.th_new_place, th->th.th_first_place,
4683 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004684 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004685 } else {
4686 /* Having uniform space of available computation places I can create
4687 T partitions of round(P/T) size and put threads into the first
4688 place of each partition. */
4689 double current = static_cast<double>(masters_place);
4690 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004691 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004692 int first, last;
4693 kmp_info_t *th;
4694
4695 thidx = n_th + 1;
4696 if (update_master_only == 1)
4697 thidx = 1;
4698 for (f = 0; f < thidx; f++) {
4699 first = static_cast<int>(current);
4700 last = static_cast<int>(current + spacing) - 1;
4701 KMP_DEBUG_ASSERT(last >= first);
4702 if (first >= n_places) {
4703 if (masters_place) {
4704 first -= n_places;
4705 last -= n_places;
4706 if (first == (masters_place + 1)) {
4707 KMP_DEBUG_ASSERT(f == n_th);
4708 first--;
4709 }
4710 if (last == masters_place) {
4711 KMP_DEBUG_ASSERT(f == (n_th - 1));
4712 last--;
4713 }
4714 } else {
4715 KMP_DEBUG_ASSERT(f == n_th);
4716 first = 0;
4717 last = 0;
4718 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004719 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004720 if (last >= n_places) {
4721 last = (n_places - 1);
4722 }
4723 place = first;
4724 current += spacing;
4725 if (f < n_th) {
4726 KMP_DEBUG_ASSERT(0 <= first);
4727 KMP_DEBUG_ASSERT(n_places > first);
4728 KMP_DEBUG_ASSERT(0 <= last);
4729 KMP_DEBUG_ASSERT(n_places > last);
4730 KMP_DEBUG_ASSERT(last_place >= first_place);
4731 th = team->t.t_threads[f];
4732 KMP_DEBUG_ASSERT(th);
4733 th->th.th_first_place = first;
4734 th->th.th_new_place = place;
4735 th->th.th_last_place = last;
Jonathan Peyton30419822017-05-12 18:01:32 +00004736
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004737 KA_TRACE(100,
4738 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4739 "partition = [%d,%d], spacing = %.4f\n",
4740 __kmp_gtid_from_thread(team->t.t_threads[f]),
4741 team->t.t_id, f, th->th.th_new_place,
4742 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004743 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004744 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004745 }
4746 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4747 } else {
4748 int S, rem, gap, s_count;
4749 S = n_th / n_places;
4750 s_count = 0;
4751 rem = n_th - (S * n_places);
4752 gap = rem > 0 ? n_places / rem : n_places;
4753 int place = masters_place;
4754 int gap_ct = gap;
4755 thidx = n_th;
4756 if (update_master_only == 1)
4757 thidx = 1;
4758 for (f = 0; f < thidx; f++) {
4759 kmp_info_t *th = team->t.t_threads[f];
4760 KMP_DEBUG_ASSERT(th != NULL);
4761
4762 th->th.th_first_place = place;
4763 th->th.th_last_place = place;
4764 th->th.th_new_place = place;
4765 s_count++;
4766
4767 if ((s_count == S) && rem && (gap_ct == gap)) {
4768 // do nothing, add an extra thread to place on next iteration
4769 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4770 // we added an extra thread to this place; move on to next place
4771 if (place == last_place) {
4772 place = first_place;
4773 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4774 place = 0;
4775 } else {
4776 place++;
4777 }
4778 s_count = 0;
4779 gap_ct = 1;
4780 rem--;
4781 } else if (s_count == S) { // place is full; don't add extra thread
4782 if (place == last_place) {
4783 place = first_place;
4784 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4785 place = 0;
4786 } else {
4787 place++;
4788 }
4789 gap_ct++;
4790 s_count = 0;
4791 }
4792
4793 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4794 "partition = [%d,%d]\n",
4795 __kmp_gtid_from_thread(team->t.t_threads[f]),
4796 team->t.t_id, f, th->th.th_new_place,
4797 th->th.th_first_place, th->th.th_last_place));
4798 }
4799 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4800 }
4801 } break;
4802
4803 default:
4804 break;
4805 }
4806
4807 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004808}
4809
Alp Toker98758b02014-03-02 04:12:06 +00004810#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004811
Jonathan Peyton30419822017-05-12 18:01:32 +00004812/* allocate a new team data structure to use. take one off of the free pool if
4813 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004814kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004815__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004816#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004817 ompt_data_t ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004818#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004819#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004820 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004821#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004822 kmp_internal_control_t *new_icvs,
4823 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4824 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4825 int f;
4826 kmp_team_t *team;
4827 int use_hot_team = !root->r.r_active;
4828 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004829
Jonathan Peyton30419822017-05-12 18:01:32 +00004830 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4831 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4832 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4833 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004834
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004835#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004836 kmp_hot_team_ptr_t *hot_teams;
4837 if (master) {
4838 team = master->th.th_team;
4839 level = team->t.t_active_level;
4840 if (master->th.th_teams_microtask) { // in teams construct?
4841 if (master->th.th_teams_size.nteams > 1 &&
4842 ( // #teams > 1
4843 team->t.t_pkfn ==
4844 (microtask_t)__kmp_teams_master || // inner fork of the teams
4845 master->th.th_teams_level <
4846 team->t.t_level)) { // or nested parallel inside the teams
4847 ++level; // not increment if #teams==1, or for outer fork of the teams;
4848 // increment otherwise
4849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004850 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004851 hot_teams = master->th.th_hot_teams;
4852 if (level < __kmp_hot_teams_max_level && hot_teams &&
4853 hot_teams[level]
4854 .hot_team) { // hot team has already been allocated for given level
4855 use_hot_team = 1;
4856 } else {
4857 use_hot_team = 0;
4858 }
4859 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004860#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004861 // Optimization to use a "hot" team
4862 if (use_hot_team && new_nproc > 1) {
4863 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004864#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004865 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004866#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004867 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004868#endif
4869#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004870 if (__kmp_tasking_mode != tskm_immediate_exec) {
4871 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4872 "task_team[1] = %p before reinit\n",
4873 team->t.t_task_team[0], team->t.t_task_team[1]));
4874 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004875#endif
4876
Jonathan Peyton30419822017-05-12 18:01:32 +00004877 // Has the number of threads changed?
4878 /* Let's assume the most common case is that the number of threads is
4879 unchanged, and put that case first. */
4880 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4881 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4882 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004883 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004884 if (team->t.t_size_changed == -1) {
4885 team->t.t_size_changed = 1;
4886 } else {
4887 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4888 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004889
Jonathan Peyton30419822017-05-12 18:01:32 +00004890 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4891 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004892 // set master's schedule as new run-time schedule
4893 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004894
Jonathan Peyton30419822017-05-12 18:01:32 +00004895 __kmp_reinitialize_team(team, new_icvs,
4896 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004897
Jonathan Peyton30419822017-05-12 18:01:32 +00004898 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4899 team->t.t_threads[0], team));
4900 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004901
4902#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004903#if KMP_AFFINITY_SUPPORTED
4904 if ((team->t.t_size_changed == 0) &&
4905 (team->t.t_proc_bind == new_proc_bind)) {
4906 if (new_proc_bind == proc_bind_spread) {
4907 __kmp_partition_places(
4908 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004909 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004910 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4911 "proc_bind = %d, partition = [%d,%d]\n",
4912 team->t.t_id, new_proc_bind, team->t.t_first_place,
4913 team->t.t_last_place));
4914 } else {
4915 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4916 __kmp_partition_places(team);
4917 }
4918#else
4919 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4920#endif /* KMP_AFFINITY_SUPPORTED */
4921#endif /* OMP_40_ENABLED */
4922 } else if (team->t.t_nproc > new_nproc) {
4923 KA_TRACE(20,
4924 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4925 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004926
Jonathan Peyton30419822017-05-12 18:01:32 +00004927 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004928#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004929 if (__kmp_hot_teams_mode == 0) {
4930 // AC: saved number of threads should correspond to team's value in this
4931 // mode, can be bigger in mode 1, when hot team has threads in reserve
4932 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4933 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004934#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004935 /* release the extra threads we don't need any more */
4936 for (f = new_nproc; f < team->t.t_nproc; f++) {
4937 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4938 if (__kmp_tasking_mode != tskm_immediate_exec) {
4939 // When decreasing team size, threads no longer in the team should
4940 // unref task team.
4941 team->t.t_threads[f]->th.th_task_team = NULL;
4942 }
4943 __kmp_free_thread(team->t.t_threads[f]);
4944 team->t.t_threads[f] = NULL;
4945 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004946#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004947 } // (__kmp_hot_teams_mode == 0)
4948 else {
4949 // When keeping extra threads in team, switch threads to wait on own
4950 // b_go flag
4951 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4952 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4953 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4954 for (int b = 0; b < bs_last_barrier; ++b) {
4955 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4956 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004957 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004958 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4959 }
4960 }
4961 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004962#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004963 team->t.t_nproc = new_nproc;
4964 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004965 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
Jonathan Peyton30419822017-05-12 18:01:32 +00004966 __kmp_reinitialize_team(team, new_icvs,
4967 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004968
Jonathan Peyton30419822017-05-12 18:01:32 +00004969 /* update the remaining threads */
4970 for (f = 0; f < new_nproc; ++f) {
4971 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4972 }
4973 // restore the current task state of the master thread: should be the
4974 // implicit task
4975 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4976 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004977
Jonathan Peyton30419822017-05-12 18:01:32 +00004978 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004979
4980#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004981 for (f = 0; f < team->t.t_nproc; f++) {
4982 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4983 team->t.t_threads[f]->th.th_team_nproc ==
4984 team->t.t_nproc);
4985 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986#endif
4987
4988#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004989 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4990#if KMP_AFFINITY_SUPPORTED
4991 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004992#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004993#endif
4994 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004995#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00004996 kmp_affin_mask_t *old_mask;
4997 if (KMP_AFFINITY_CAPABLE()) {
4998 KMP_CPU_ALLOC(old_mask);
4999 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005000#endif
5001
Jonathan Peyton30419822017-05-12 18:01:32 +00005002 KA_TRACE(20,
5003 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5004 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005005
Jonathan Peyton30419822017-05-12 18:01:32 +00005006 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005007
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005008#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005009 int avail_threads = hot_teams[level].hot_team_nth;
5010 if (new_nproc < avail_threads)
5011 avail_threads = new_nproc;
5012 kmp_info_t **other_threads = team->t.t_threads;
5013 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5014 // Adjust barrier data of reserved threads (if any) of the team
5015 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005016 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005017 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5018 for (b = 0; b < bs_last_barrier; ++b) {
5019 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5020 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005021#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005022 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005023#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005024 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005025 }
5026 if (hot_teams[level].hot_team_nth >= new_nproc) {
5027 // we have all needed threads in reserve, no need to allocate any
5028 // this only possible in mode 1, cannot have reserved threads in mode 0
5029 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5030 team->t.t_nproc = new_nproc; // just get reserved threads involved
5031 } else {
5032 // we may have some threads in reserve, but not enough
5033 team->t.t_nproc =
5034 hot_teams[level]
5035 .hot_team_nth; // get reserved threads involved if any
5036 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5037#endif // KMP_NESTED_HOT_TEAMS
5038 if (team->t.t_max_nproc < new_nproc) {
5039 /* reallocate larger arrays */
5040 __kmp_reallocate_team_arrays(team, new_nproc);
5041 __kmp_reinitialize_team(team, new_icvs, NULL);
5042 }
5043
5044#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5045 /* Temporarily set full mask for master thread before creation of
5046 workers. The reason is that workers inherit the affinity from master,
5047 so if a lot of workers are created on the single core quickly, they
5048 don't get a chance to set their own affinity for a long time. */
5049 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5050#endif
5051
5052 /* allocate new threads for the hot team */
5053 for (f = team->t.t_nproc; f < new_nproc; f++) {
5054 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5055 KMP_DEBUG_ASSERT(new_worker);
5056 team->t.t_threads[f] = new_worker;
5057
5058 KA_TRACE(20,
5059 ("__kmp_allocate_team: team %d init T#%d arrived: "
5060 "join=%llu, plain=%llu\n",
5061 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5062 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5063 team->t.t_bar[bs_plain_barrier].b_arrived));
5064
5065 { // Initialize barrier data for new threads.
5066 int b;
5067 kmp_balign_t *balign = new_worker->th.th_bar;
5068 for (b = 0; b < bs_last_barrier; ++b) {
5069 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5070 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5071 KMP_BARRIER_PARENT_FLAG);
5072#if USE_DEBUGGER
5073 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5074#endif
5075 }
5076 }
5077 }
5078
5079#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5080 if (KMP_AFFINITY_CAPABLE()) {
5081 /* Restore initial master thread's affinity mask */
5082 __kmp_set_system_affinity(old_mask, TRUE);
5083 KMP_CPU_FREE(old_mask);
5084 }
5085#endif
5086#if KMP_NESTED_HOT_TEAMS
5087 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5088#endif // KMP_NESTED_HOT_TEAMS
5089 /* make sure everyone is syncronized */
5090 int old_nproc = team->t.t_nproc; // save old value and use to update only
5091 // new threads below
5092 __kmp_initialize_team(team, new_nproc, new_icvs,
5093 root->r.r_uber_thread->th.th_ident);
5094
5095 /* reinitialize the threads */
5096 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5097 for (f = 0; f < team->t.t_nproc; ++f)
5098 __kmp_initialize_info(team->t.t_threads[f], team, f,
5099 __kmp_gtid_from_tid(f, team));
5100 if (level) { // set th_task_state for new threads in nested hot team
5101 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5102 // only need to set the th_task_state for the new threads. th_task_state
5103 // for master thread will not be accurate until after this in
5104 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5105 // correct value.
5106 for (f = old_nproc; f < team->t.t_nproc; ++f)
5107 team->t.t_threads[f]->th.th_task_state =
5108 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5109 } else { // set th_task_state for new threads in non-nested hot team
5110 int old_state =
5111 team->t.t_threads[0]->th.th_task_state; // copy master's state
5112 for (f = old_nproc; f < team->t.t_nproc; ++f)
5113 team->t.t_threads[f]->th.th_task_state = old_state;
5114 }
5115
5116#ifdef KMP_DEBUG
5117 for (f = 0; f < team->t.t_nproc; ++f) {
5118 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5119 team->t.t_threads[f]->th.th_team_nproc ==
5120 team->t.t_nproc);
5121 }
5122#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005123
5124#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005125 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5126#if KMP_AFFINITY_SUPPORTED
5127 __kmp_partition_places(team);
5128#endif
5129#endif
5130 } // Check changes in number of threads
5131
5132#if OMP_40_ENABLED
5133 kmp_info_t *master = team->t.t_threads[0];
5134 if (master->th.th_teams_microtask) {
5135 for (f = 1; f < new_nproc; ++f) {
5136 // propagate teams construct specific info to workers
5137 kmp_info_t *thr = team->t.t_threads[f];
5138 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5139 thr->th.th_teams_level = master->th.th_teams_level;
5140 thr->th.th_teams_size = master->th.th_teams_size;
5141 }
5142 }
5143#endif /* OMP_40_ENABLED */
5144#if KMP_NESTED_HOT_TEAMS
5145 if (level) {
5146 // Sync barrier state for nested hot teams, not needed for outermost hot
5147 // team.
5148 for (f = 1; f < new_nproc; ++f) {
5149 kmp_info_t *thr = team->t.t_threads[f];
5150 int b;
5151 kmp_balign_t *balign = thr->th.th_bar;
5152 for (b = 0; b < bs_last_barrier; ++b) {
5153 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5154 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5155#if USE_DEBUGGER
5156 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5157#endif
5158 }
5159 }
5160 }
5161#endif // KMP_NESTED_HOT_TEAMS
5162
5163 /* reallocate space for arguments if necessary */
5164 __kmp_alloc_argv_entries(argc, team, TRUE);
5165 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5166 // The hot team re-uses the previous task team,
5167 // if untouched during the previous release->gather phase.
5168
5169 KF_TRACE(10, (" hot_team = %p\n", team));
5170
5171#if KMP_DEBUG
5172 if (__kmp_tasking_mode != tskm_immediate_exec) {
5173 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5174 "task_team[1] = %p after reinit\n",
5175 team->t.t_task_team[0], team->t.t_task_team[1]));
5176 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005177#endif
5178
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005179#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005180 __ompt_team_assign_id(team, ompt_parallel_data);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005181#endif
5182
Jim Cownie5e8470a2013-09-27 10:38:44 +00005183 KMP_MB();
5184
Jim Cownie5e8470a2013-09-27 10:38:44 +00005185 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005186 }
5187
5188 /* next, let's try to take one from the team pool */
5189 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005190 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005191 /* TODO: consider resizing undersized teams instead of reaping them, now
5192 that we have a resizing mechanism */
5193 if (team->t.t_max_nproc >= max_nproc) {
5194 /* take this team from the team pool */
5195 __kmp_team_pool = team->t.t_next_pool;
5196
5197 /* setup the team for fresh use */
5198 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5199
5200 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5201 "task_team[1] %p to NULL\n",
5202 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5203 team->t.t_task_team[0] = NULL;
5204 team->t.t_task_team[1] = NULL;
5205
5206 /* reallocate space for arguments if necessary */
5207 __kmp_alloc_argv_entries(argc, team, TRUE);
5208 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5209
5210 KA_TRACE(
5211 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5212 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5213 { // Initialize barrier data.
5214 int b;
5215 for (b = 0; b < bs_last_barrier; ++b) {
5216 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5217#if USE_DEBUGGER
5218 team->t.t_bar[b].b_master_arrived = 0;
5219 team->t.t_bar[b].b_team_arrived = 0;
5220#endif
5221 }
5222 }
5223
5224#if OMP_40_ENABLED
5225 team->t.t_proc_bind = new_proc_bind;
5226#endif
5227
5228 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5229 team->t.t_id));
5230
5231#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005232 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005233#endif
5234
5235 KMP_MB();
5236
5237 return team;
5238 }
5239
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005240 /* reap team if it is too small, then loop back and check the next one */
5241 // not sure if this is wise, but, will be redone during the hot-teams
5242 // rewrite.
5243 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005244 team = __kmp_reap_team(team);
5245 __kmp_team_pool = team;
5246 }
5247
5248 /* nothing available in the pool, no matter, make a new team! */
5249 KMP_MB();
5250 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5251
5252 /* and set it up */
5253 team->t.t_max_nproc = max_nproc;
5254 /* NOTE well, for some reason allocating one big buffer and dividing it up
5255 seems to really hurt performance a lot on the P4, so, let's not use this */
5256 __kmp_allocate_team_arrays(team, max_nproc);
5257
5258 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5259 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5260
5261 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5262 "%p to NULL\n",
5263 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5264 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5265 // memory, no need to duplicate
5266 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5267 // memory, no need to duplicate
5268
5269 if (__kmp_storage_map) {
5270 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5271 }
5272
5273 /* allocate space for arguments */
5274 __kmp_alloc_argv_entries(argc, team, FALSE);
5275 team->t.t_argc = argc;
5276
5277 KA_TRACE(20,
5278 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5279 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5280 { // Initialize barrier data.
5281 int b;
5282 for (b = 0; b < bs_last_barrier; ++b) {
5283 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5284#if USE_DEBUGGER
5285 team->t.t_bar[b].b_master_arrived = 0;
5286 team->t.t_bar[b].b_team_arrived = 0;
5287#endif
5288 }
5289 }
5290
5291#if OMP_40_ENABLED
5292 team->t.t_proc_bind = new_proc_bind;
5293#endif
5294
5295#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005296 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005297 team->t.ompt_serialized_team_info = NULL;
5298#endif
5299
5300 KMP_MB();
5301
5302 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5303 team->t.t_id));
5304
5305 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005306}
5307
5308/* TODO implement hot-teams at all levels */
5309/* TODO implement lazy thread release on demand (disband request) */
5310
5311/* free the team. return it to the team pool. release all the threads
5312 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005313void __kmp_free_team(kmp_root_t *root,
5314 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5315 int f;
5316 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5317 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005318
Jonathan Peyton30419822017-05-12 18:01:32 +00005319 /* verify state */
5320 KMP_DEBUG_ASSERT(root);
5321 KMP_DEBUG_ASSERT(team);
5322 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5323 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005324
Jonathan Peyton30419822017-05-12 18:01:32 +00005325 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005326#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005327 int level;
5328 kmp_hot_team_ptr_t *hot_teams;
5329 if (master) {
5330 level = team->t.t_active_level - 1;
5331 if (master->th.th_teams_microtask) { // in teams construct?
5332 if (master->th.th_teams_size.nteams > 1) {
5333 ++level; // level was not increased in teams construct for
5334 // team_of_masters
5335 }
5336 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5337 master->th.th_teams_level == team->t.t_level) {
5338 ++level; // level was not increased in teams construct for
5339 // team_of_workers before the parallel
5340 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005341 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005342 hot_teams = master->th.th_hot_teams;
5343 if (level < __kmp_hot_teams_max_level) {
5344 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5345 use_hot_team = 1;
5346 }
5347 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005348#endif // KMP_NESTED_HOT_TEAMS
5349
Jonathan Peyton30419822017-05-12 18:01:32 +00005350 /* team is done working */
5351 TCW_SYNC_PTR(team->t.t_pkfn,
5352 NULL); // Important for Debugging Support Library.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005353#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005354 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005355#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005356 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005357
Jonathan Peyton30419822017-05-12 18:01:32 +00005358 /* if we are non-hot team, release our threads */
5359 if (!use_hot_team) {
5360 if (__kmp_tasking_mode != tskm_immediate_exec) {
5361 // Wait for threads to reach reapable state
5362 for (f = 1; f < team->t.t_nproc; ++f) {
5363 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5364 kmp_info_t *th = team->t.t_threads[f];
5365 volatile kmp_uint32 *state = &th->th.th_reap_state;
5366 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005367#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005368 // On Windows a thread can be killed at any time, check this
5369 DWORD ecode;
5370 if (!__kmp_is_thread_alive(th, &ecode)) {
5371 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5372 break;
5373 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005374#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005375 // first check if thread is sleeping
5376 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5377 if (fl.is_sleeping())
5378 fl.resume(__kmp_gtid_from_thread(th));
5379 KMP_CPU_PAUSE();
5380 }
5381 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005382
Jonathan Peyton30419822017-05-12 18:01:32 +00005383 // Delete task teams
5384 int tt_idx;
5385 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5386 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5387 if (task_team != NULL) {
5388 for (f = 0; f < team->t.t_nproc;
5389 ++f) { // Have all threads unref task teams
5390 team->t.t_threads[f]->th.th_task_team = NULL;
5391 }
5392 KA_TRACE(
5393 20,
5394 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5395 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005396#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005397 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005398#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005399 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005400 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005401 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005402 }
5403
Jonathan Peyton30419822017-05-12 18:01:32 +00005404 // Reset pointer to parent team only for non-hot teams.
5405 team->t.t_parent = NULL;
5406 team->t.t_level = 0;
5407 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005408
Jonathan Peyton30419822017-05-12 18:01:32 +00005409 /* free the worker threads */
5410 for (f = 1; f < team->t.t_nproc; ++f) {
5411 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5412 __kmp_free_thread(team->t.t_threads[f]);
5413 team->t.t_threads[f] = NULL;
5414 }
5415
5416 /* put the team back in the team pool */
5417 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005418 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005419 __kmp_team_pool = (volatile kmp_team_t *)team;
5420 }
5421
5422 KMP_MB();
5423}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005424
5425/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005426kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5427 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005428
Jonathan Peyton30419822017-05-12 18:01:32 +00005429 KMP_DEBUG_ASSERT(team);
5430 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5431 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5432 KMP_DEBUG_ASSERT(team->t.t_threads);
5433 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005434
Jonathan Peyton30419822017-05-12 18:01:32 +00005435 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005436
Jonathan Peyton30419822017-05-12 18:01:32 +00005437 /* free stuff */
5438 __kmp_free_team_arrays(team);
5439 if (team->t.t_argv != &team->t.t_inline_argv[0])
5440 __kmp_free((void *)team->t.t_argv);
5441 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005442
Jonathan Peyton30419822017-05-12 18:01:32 +00005443 KMP_MB();
5444 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005445}
5446
Jim Cownie5e8470a2013-09-27 10:38:44 +00005447// Free the thread. Don't reap it, just place it on the pool of available
5448// threads.
5449//
5450// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5451// binding for the affinity mechanism to be useful.
5452//
5453// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5454// However, we want to avoid a potential performance problem by always
5455// scanning through the list to find the correct point at which to insert
5456// the thread (potential N**2 behavior). To do this we keep track of the
5457// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5458// With single-level parallelism, threads will always be added to the tail
5459// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5460// parallelism, all bets are off and we may need to scan through the entire
5461// free list.
5462//
5463// This change also has a potentially large performance benefit, for some
5464// applications. Previously, as threads were freed from the hot team, they
5465// would be placed back on the free list in inverse order. If the hot team
5466// grew back to it's original size, then the freed thread would be placed
5467// back on the hot team in reverse order. This could cause bad cache
5468// locality problems on programs where the size of the hot team regularly
5469// grew and shrunk.
5470//
5471// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005472void __kmp_free_thread(kmp_info_t *this_th) {
5473 int gtid;
5474 kmp_info_t **scan;
Jonathan Peytonf4392462017-07-27 20:58:41 +00005475 kmp_root_t *root = this_th->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005476
Jonathan Peyton30419822017-05-12 18:01:32 +00005477 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5478 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005479
Jonathan Peyton30419822017-05-12 18:01:32 +00005480 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005481
Jonathan Peyton30419822017-05-12 18:01:32 +00005482 // When moving thread to pool, switch thread to wait on own b_go flag, and
5483 // uninitialized (NULL team).
5484 int b;
5485 kmp_balign_t *balign = this_th->th.th_bar;
5486 for (b = 0; b < bs_last_barrier; ++b) {
5487 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5488 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5489 balign[b].bb.team = NULL;
5490 balign[b].bb.leaf_kids = 0;
5491 }
5492 this_th->th.th_task_state = 0;
Andrey Churbanov3336aa02018-03-19 18:05:15 +00005493 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
Jonathan Peyton30419822017-05-12 18:01:32 +00005494
5495 /* put thread back on the free pool */
5496 TCW_PTR(this_th->th.th_team, NULL);
5497 TCW_PTR(this_th->th.th_root, NULL);
5498 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5499
Jonathan Peytonbff8ded2018-01-10 18:24:09 +00005500 /* If the implicit task assigned to this thread can be used by other threads
5501 * -> multiple threads can share the data and try to free the task at
5502 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5503 * with higher probability when hot team is disabled but can occurs even when
5504 * the hot team is enabled */
5505 __kmp_free_implicit_task(this_th);
5506 this_th->th.th_current_task = NULL;
5507
Jonathan Peyton30419822017-05-12 18:01:32 +00005508 // If the __kmp_thread_pool_insert_pt is already past the new insert
5509 // point, then we need to re-scan the entire list.
5510 gtid = this_th->th.th_info.ds.ds_gtid;
5511 if (__kmp_thread_pool_insert_pt != NULL) {
5512 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5513 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5514 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005515 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005516 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005517
Jonathan Peyton30419822017-05-12 18:01:32 +00005518 // Scan down the list to find the place to insert the thread.
5519 // scan is the address of a link in the list, possibly the address of
5520 // __kmp_thread_pool itself.
5521 //
5522 // In the absence of nested parallism, the for loop will have 0 iterations.
5523 if (__kmp_thread_pool_insert_pt != NULL) {
5524 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5525 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005526 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005527 }
5528 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5529 scan = &((*scan)->th.th_next_pool))
5530 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005531
Jonathan Peyton30419822017-05-12 18:01:32 +00005532 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5533 // to its address.
5534 TCW_PTR(this_th->th.th_next_pool, *scan);
5535 __kmp_thread_pool_insert_pt = *scan = this_th;
5536 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5537 (this_th->th.th_info.ds.ds_gtid <
5538 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5539 TCW_4(this_th->th.th_in_pool, TRUE);
5540 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005541
Jonathan Peyton30419822017-05-12 18:01:32 +00005542 TCW_4(__kmp_nth, __kmp_nth - 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00005543 root->r.r_cg_nthreads--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005544
5545#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005546 /* Adjust blocktime back to user setting or default if necessary */
5547 /* Middle initialization might never have occurred */
5548 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5549 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5550 if (__kmp_nth <= __kmp_avail_proc) {
5551 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005552 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005553 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005554#endif /* KMP_ADJUST_BLOCKTIME */
5555
Jonathan Peyton30419822017-05-12 18:01:32 +00005556 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005557}
5558
Jim Cownie5e8470a2013-09-27 10:38:44 +00005559/* ------------------------------------------------------------------------ */
5560
Jonathan Peyton30419822017-05-12 18:01:32 +00005561void *__kmp_launch_thread(kmp_info_t *this_thr) {
5562 int gtid = this_thr->th.th_info.ds.ds_gtid;
5563 /* void *stack_data;*/
5564 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005565
Jonathan Peyton30419822017-05-12 18:01:32 +00005566 KMP_MB();
5567 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005568
Jonathan Peyton30419822017-05-12 18:01:32 +00005569 if (__kmp_env_consistency_check) {
5570 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5571 }
5572
5573#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005574 ompt_data_t *thread_data;
5575 if (ompt_enabled.enabled) {
5576 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5577 thread_data->ptr = NULL;
5578
5579 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005580 this_thr->th.ompt_thread_info.wait_id = 0;
Joachim Protze82e94a52017-11-01 10:08:30 +00005581 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5582 if (ompt_enabled.ompt_callback_thread_begin) {
5583 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5584 ompt_thread_worker, thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005585 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005586 }
5587#endif
5588
Joachim Protze82e94a52017-11-01 10:08:30 +00005589#if OMPT_SUPPORT
5590 if (ompt_enabled.enabled) {
5591 this_thr->th.ompt_thread_info.state = omp_state_idle;
5592 }
5593#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005594 /* This is the place where threads wait for work */
5595 while (!TCR_4(__kmp_global.g.g_done)) {
5596 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5597 KMP_MB();
5598
5599 /* wait for work to do */
5600 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005601
Jonathan Peyton30419822017-05-12 18:01:32 +00005602 /* No tid yet since not part of a team */
5603 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5604
5605#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005606 if (ompt_enabled.enabled) {
5607 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005608 }
5609#endif
5610
5611 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5612
5613 /* have we been allocated? */
5614 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005615 /* we were just woken up, so run our new task */
5616 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5617 int rc;
5618 KA_TRACE(20,
5619 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5620 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5621 (*pteam)->t.t_pkfn));
5622
5623 updateHWFPControl(*pteam);
5624
5625#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005626 if (ompt_enabled.enabled) {
5627 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00005628 }
5629#endif
5630
5631 {
5632 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5633 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5634 rc = (*pteam)->t.t_invoke(gtid);
5635 }
5636 KMP_ASSERT(rc);
5637
Jim Cownie5e8470a2013-09-27 10:38:44 +00005638 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005639 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5640 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5641 (*pteam)->t.t_pkfn));
5642 }
Joachim Protze82e94a52017-11-01 10:08:30 +00005643#if OMPT_SUPPORT
5644 if (ompt_enabled.enabled) {
5645 /* no frame set while outside task */
Joachim Protzec255ca72017-11-05 14:11:10 +00005646 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00005647
5648 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005649 }
5650#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00005651 /* join barrier after parallel region */
5652 __kmp_join_barrier(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005653 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005654 }
5655 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005656
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005657#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005658 if (ompt_enabled.ompt_callback_thread_end) {
5659 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005660 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005661#endif
5662
Jonathan Peyton30419822017-05-12 18:01:32 +00005663 this_thr->th.th_task_team = NULL;
5664 /* run the destructors for the threadprivate data for this thread */
5665 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005666
Jonathan Peyton30419822017-05-12 18:01:32 +00005667 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5668 KMP_MB();
5669 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005670}
5671
5672/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005673
Jonathan Peyton30419822017-05-12 18:01:32 +00005674void __kmp_internal_end_dest(void *specific_gtid) {
5675#if KMP_COMPILER_ICC
5676#pragma warning(push)
5677#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5678// significant bits
5679#endif
5680 // Make sure no significant bits are lost
5681 int gtid = (kmp_intptr_t)specific_gtid - 1;
5682#if KMP_COMPILER_ICC
5683#pragma warning(pop)
5684#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005685
Jonathan Peyton30419822017-05-12 18:01:32 +00005686 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5687 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5688 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005689
Jonathan Peyton30419822017-05-12 18:01:32 +00005690 /* josh: One reason for setting the gtid specific data even when it is being
5691 destroyed by pthread is to allow gtid lookup through thread specific data
5692 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5693 that gets executed in the call to __kmp_internal_end_thread, actually
5694 gets the gtid through the thread specific data. Setting it here seems
5695 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5696 to run smoothly.
5697 todo: get rid of this after we remove the dependence on
5698 __kmp_gtid_get_specific */
5699 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5700 __kmp_gtid_set_specific(gtid);
5701#ifdef KMP_TDATA_GTID
5702 __kmp_gtid = gtid;
5703#endif
5704 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005705}
5706
Jonathan Peyton99016992015-05-26 17:32:53 +00005707#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005708
Jonathan Peyton30419822017-05-12 18:01:32 +00005709// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5710// destructors work perfectly, but in real libomp.so I have no evidence it is
5711// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005712
Jonathan Peyton30419822017-05-12 18:01:32 +00005713__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5714 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005715}
5716
Jonathan Peyton30419822017-05-12 18:01:32 +00005717void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005718
5719#endif
5720
Jonathan Peyton30419822017-05-12 18:01:32 +00005721/* [Windows] josh: when the atexit handler is called, there may still be more
5722 than one thread alive */
5723void __kmp_internal_end_atexit(void) {
5724 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5725 /* [Windows]
5726 josh: ideally, we want to completely shutdown the library in this atexit
5727 handler, but stat code that depends on thread specific data for gtid fails
5728 because that data becomes unavailable at some point during the shutdown, so
5729 we call __kmp_internal_end_thread instead. We should eventually remove the
5730 dependency on __kmp_get_specific_gtid in the stat code and use
5731 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005732
Jonathan Peyton30419822017-05-12 18:01:32 +00005733 // TODO: Can some of this comment about GVS be removed?
5734 I suspect that the offending stat code is executed when the calling thread
5735 tries to clean up a dead root thread's data structures, resulting in GVS
5736 code trying to close the GVS structures for that thread, but since the stat
5737 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5738 the calling thread is cleaning up itself instead of another thread, it get
5739 confused. This happens because allowing a thread to unregister and cleanup
5740 another thread is a recent modification for addressing an issue.
5741 Based on the current design (20050722), a thread may end up
5742 trying to unregister another thread only if thread death does not trigger
5743 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5744 thread specific data destructor function to detect thread death. For
5745 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5746 is nothing. Thus, the workaround is applicable only for Windows static
5747 stat library. */
5748 __kmp_internal_end_library(-1);
5749#if KMP_OS_WINDOWS
5750 __kmp_close_console();
5751#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005752}
5753
Jonathan Peyton30419822017-05-12 18:01:32 +00005754static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5755 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005756
Jonathan Peyton30419822017-05-12 18:01:32 +00005757 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005758
Jonathan Peyton30419822017-05-12 18:01:32 +00005759 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005760
Jonathan Peyton30419822017-05-12 18:01:32 +00005761 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005762
Jonathan Peyton30419822017-05-12 18:01:32 +00005763 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005764
Jonathan Peyton30419822017-05-12 18:01:32 +00005765 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5766 /* Assume the threads are at the fork barrier here */
5767 KA_TRACE(
5768 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5769 gtid));
5770 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5771 * (GEH) */
5772 ANNOTATE_HAPPENS_BEFORE(thread);
5773 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5774 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005775 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005776
Jonathan Peyton30419822017-05-12 18:01:32 +00005777 // Terminate OS thread.
5778 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005779
Jonathan Peyton30419822017-05-12 18:01:32 +00005780 // The thread was killed asynchronously. If it was actively
5781 // spinning in the thread pool, decrement the global count.
5782 //
5783 // There is a small timing hole here - if the worker thread was just waking
5784 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5785 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5786 // the global counter might not get updated.
5787 //
5788 // Currently, this can only happen as the library is unloaded,
5789 // so there are no harmful side effects.
5790 if (thread->th.th_active_in_pool) {
5791 thread->th.th_active_in_pool = FALSE;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005792 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5793 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
Jonathan Peyton30419822017-05-12 18:01:32 +00005794 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005795
Jonathan Peyton30419822017-05-12 18:01:32 +00005796 // Decrement # of [worker] threads in the pool.
5797 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5798 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005799 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005800
Jonathan Peyton30419822017-05-12 18:01:32 +00005801 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005802
Jonathan Peyton30419822017-05-12 18:01:32 +00005803// Free the fast memory for tasking
5804#if USE_FAST_MEMORY
5805 __kmp_free_fast_memory(thread);
5806#endif /* USE_FAST_MEMORY */
5807
5808 __kmp_suspend_uninitialize_thread(thread);
5809
5810 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5811 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5812
5813 --__kmp_all_nth;
5814// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005815
5816#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005817 /* Adjust blocktime back to user setting or default if necessary */
5818 /* Middle initialization might never have occurred */
5819 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5820 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5821 if (__kmp_nth <= __kmp_avail_proc) {
5822 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005823 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005824 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005825#endif /* KMP_ADJUST_BLOCKTIME */
5826
Jonathan Peyton30419822017-05-12 18:01:32 +00005827 /* free the memory being used */
5828 if (__kmp_env_consistency_check) {
5829 if (thread->th.th_cons) {
5830 __kmp_free_cons_stack(thread->th.th_cons);
5831 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005832 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005833 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005834
Jonathan Peyton30419822017-05-12 18:01:32 +00005835 if (thread->th.th_pri_common != NULL) {
5836 __kmp_free(thread->th.th_pri_common);
5837 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005838 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005839
Jonathan Peyton30419822017-05-12 18:01:32 +00005840 if (thread->th.th_task_state_memo_stack != NULL) {
5841 __kmp_free(thread->th.th_task_state_memo_stack);
5842 thread->th.th_task_state_memo_stack = NULL;
5843 }
5844
5845#if KMP_USE_BGET
5846 if (thread->th.th_local.bget_data != NULL) {
5847 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005848 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005849#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005850
Alp Toker98758b02014-03-02 04:12:06 +00005851#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005852 if (thread->th.th_affin_mask != NULL) {
5853 KMP_CPU_FREE(thread->th.th_affin_mask);
5854 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005855 }
Alp Toker98758b02014-03-02 04:12:06 +00005856#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857
Jonathan Peyton30419822017-05-12 18:01:32 +00005858 __kmp_reap_team(thread->th.th_serial_team);
5859 thread->th.th_serial_team = NULL;
5860 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861
Jonathan Peyton30419822017-05-12 18:01:32 +00005862 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005863
5864} // __kmp_reap_thread
5865
Jonathan Peyton30419822017-05-12 18:01:32 +00005866static void __kmp_internal_end(void) {
5867 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005868
Jonathan Peyton30419822017-05-12 18:01:32 +00005869 /* First, unregister the library */
5870 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005871
Jonathan Peyton30419822017-05-12 18:01:32 +00005872#if KMP_OS_WINDOWS
5873 /* In Win static library, we can't tell when a root actually dies, so we
5874 reclaim the data structures for any root threads that have died but not
5875 unregistered themselves, in order to shut down cleanly.
5876 In Win dynamic library we also can't tell when a thread dies. */
5877 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5878// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005879#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005880
Jonathan Peyton30419822017-05-12 18:01:32 +00005881 for (i = 0; i < __kmp_threads_capacity; i++)
5882 if (__kmp_root[i])
5883 if (__kmp_root[i]->r.r_active)
5884 break;
5885 KMP_MB(); /* Flush all pending memory write invalidates. */
5886 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5887
5888 if (i < __kmp_threads_capacity) {
5889#if KMP_USE_MONITOR
5890 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5891 KMP_MB(); /* Flush all pending memory write invalidates. */
5892
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005893 // Need to check that monitor was initialized before reaping it. If we are
5894 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5895 // __kmp_monitor will appear to contain valid data, but it is only valid in
5896 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00005897 // New behavior (201008): instead of keying off of the flag
5898 // __kmp_init_parallel, the monitor thread creation is keyed off
5899 // of the new flag __kmp_init_monitor.
5900 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5901 if (TCR_4(__kmp_init_monitor)) {
5902 __kmp_reap_monitor(&__kmp_monitor);
5903 TCW_4(__kmp_init_monitor, 0);
5904 }
5905 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5906 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5907#endif // KMP_USE_MONITOR
5908 } else {
5909/* TODO move this to cleanup code */
5910#ifdef KMP_DEBUG
5911 /* make sure that everything has properly ended */
5912 for (i = 0; i < __kmp_threads_capacity; i++) {
5913 if (__kmp_root[i]) {
5914 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5915 // there can be uber threads alive here
5916 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5917 }
5918 }
5919#endif
5920
5921 KMP_MB();
5922
5923 // Reap the worker threads.
5924 // This is valid for now, but be careful if threads are reaped sooner.
5925 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5926 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005927 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005928 __kmp_thread_pool = thread->th.th_next_pool;
5929 // Reap it.
5930 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5931 thread->th.th_next_pool = NULL;
5932 thread->th.th_in_pool = FALSE;
5933 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005934 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005935 __kmp_thread_pool_insert_pt = NULL;
5936
5937 // Reap teams.
5938 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5939 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005940 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005941 __kmp_team_pool = team->t.t_next_pool;
5942 // Reap it.
5943 team->t.t_next_pool = NULL;
5944 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005945 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005946
5947 __kmp_reap_task_teams();
5948
5949 for (i = 0; i < __kmp_threads_capacity; ++i) {
5950 // TBD: Add some checking...
5951 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5952 }
5953
5954 /* Make sure all threadprivate destructors get run by joining with all
5955 worker threads before resetting this flag */
5956 TCW_SYNC_4(__kmp_init_common, FALSE);
5957
5958 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5959 KMP_MB();
5960
5961#if KMP_USE_MONITOR
5962 // See note above: One of the possible fixes for CQ138434 / CQ140126
5963 //
5964 // FIXME: push both code fragments down and CSE them?
5965 // push them into __kmp_cleanup() ?
5966 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5967 if (TCR_4(__kmp_init_monitor)) {
5968 __kmp_reap_monitor(&__kmp_monitor);
5969 TCW_4(__kmp_init_monitor, 0);
5970 }
5971 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5972 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5973#endif
5974 } /* else !__kmp_global.t_active */
5975 TCW_4(__kmp_init_gtid, FALSE);
5976 KMP_MB(); /* Flush all pending memory write invalidates. */
5977
5978 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005979#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005980 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005981#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005982}
5983
Jonathan Peyton30419822017-05-12 18:01:32 +00005984void __kmp_internal_end_library(int gtid_req) {
5985 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5986 /* this shouldn't be a race condition because __kmp_internal_end() is the
5987 only place to clear __kmp_serial_init */
5988 /* we'll check this later too, after we get the lock */
5989 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
5990 // redundaant, because the next check will work in any case.
5991 if (__kmp_global.g.g_abort) {
5992 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
5993 /* TODO abort? */
5994 return;
5995 }
5996 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5997 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
5998 return;
5999 }
6000
6001 KMP_MB(); /* Flush all pending memory write invalidates. */
6002
6003 /* find out who we are and what we should do */
6004 {
6005 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6006 KA_TRACE(
6007 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6008 if (gtid == KMP_GTID_SHUTDOWN) {
6009 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6010 "already shutdown\n"));
6011 return;
6012 } else if (gtid == KMP_GTID_MONITOR) {
6013 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6014 "registered, or system shutdown\n"));
6015 return;
6016 } else if (gtid == KMP_GTID_DNE) {
6017 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6018 "shutdown\n"));
6019 /* we don't know who we are, but we may still shutdown the library */
6020 } else if (KMP_UBER_GTID(gtid)) {
6021 /* unregister ourselves as an uber thread. gtid is no longer valid */
6022 if (__kmp_root[gtid]->r.r_active) {
6023 __kmp_global.g.g_abort = -1;
6024 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6025 KA_TRACE(10,
6026 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6027 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006028 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006029 } else {
6030 KA_TRACE(
6031 10,
6032 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6033 __kmp_unregister_root_current_thread(gtid);
6034 }
6035 } else {
6036/* worker threads may call this function through the atexit handler, if they
6037 * call exit() */
6038/* For now, skip the usual subsequent processing and just dump the debug buffer.
6039 TODO: do a thorough shutdown instead */
6040#ifdef DUMP_DEBUG_ON_EXIT
6041 if (__kmp_debug_buf)
6042 __kmp_dump_debug_buffer();
6043#endif
6044 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006045 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006046 }
6047 /* synchronize the termination process */
6048 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006049
Jonathan Peyton30419822017-05-12 18:01:32 +00006050 /* have we already finished */
6051 if (__kmp_global.g.g_abort) {
6052 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6053 /* TODO abort? */
6054 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6055 return;
6056 }
6057 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6058 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6059 return;
6060 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006061
Jonathan Peyton30419822017-05-12 18:01:32 +00006062 /* We need this lock to enforce mutex between this reading of
6063 __kmp_threads_capacity and the writing by __kmp_register_root.
6064 Alternatively, we can use a counter of roots that is atomically updated by
6065 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6066 __kmp_internal_end_*. */
6067 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006068
Jonathan Peyton30419822017-05-12 18:01:32 +00006069 /* now we can safely conduct the actual termination */
6070 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006071
Jonathan Peyton30419822017-05-12 18:01:32 +00006072 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6073 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006074
Jonathan Peyton30419822017-05-12 18:01:32 +00006075 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006076
Jonathan Peyton30419822017-05-12 18:01:32 +00006077#ifdef DUMP_DEBUG_ON_EXIT
6078 if (__kmp_debug_buf)
6079 __kmp_dump_debug_buffer();
6080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006081
Jonathan Peyton30419822017-05-12 18:01:32 +00006082#if KMP_OS_WINDOWS
6083 __kmp_close_console();
6084#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006085
Jonathan Peyton30419822017-05-12 18:01:32 +00006086 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006087
6088} // __kmp_internal_end_library
6089
Jonathan Peyton30419822017-05-12 18:01:32 +00006090void __kmp_internal_end_thread(int gtid_req) {
6091 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006092
Jonathan Peyton30419822017-05-12 18:01:32 +00006093 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6094 /* this shouldn't be a race condition because __kmp_internal_end() is the
6095 * only place to clear __kmp_serial_init */
6096 /* we'll check this later too, after we get the lock */
6097 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6098 // redundant, because the next check will work in any case.
6099 if (__kmp_global.g.g_abort) {
6100 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6101 /* TODO abort? */
6102 return;
6103 }
6104 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6105 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6106 return;
6107 }
6108
6109 KMP_MB(); /* Flush all pending memory write invalidates. */
6110
6111 /* find out who we are and what we should do */
6112 {
6113 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6114 KA_TRACE(10,
6115 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6116 if (gtid == KMP_GTID_SHUTDOWN) {
6117 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6118 "already shutdown\n"));
6119 return;
6120 } else if (gtid == KMP_GTID_MONITOR) {
6121 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6122 "registered, or system shutdown\n"));
6123 return;
6124 } else if (gtid == KMP_GTID_DNE) {
6125 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6126 "shutdown\n"));
6127 return;
6128 /* we don't know who we are */
6129 } else if (KMP_UBER_GTID(gtid)) {
6130 /* unregister ourselves as an uber thread. gtid is no longer valid */
6131 if (__kmp_root[gtid]->r.r_active) {
6132 __kmp_global.g.g_abort = -1;
6133 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6134 KA_TRACE(10,
6135 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6136 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006137 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006138 } else {
6139 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6140 gtid));
6141 __kmp_unregister_root_current_thread(gtid);
6142 }
6143 } else {
6144 /* just a worker thread, let's leave */
6145 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6146
6147 if (gtid >= 0) {
6148 __kmp_threads[gtid]->th.th_task_team = NULL;
6149 }
6150
6151 KA_TRACE(10,
6152 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6153 gtid));
6154 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006155 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006156 }
6157#if defined KMP_DYNAMIC_LIB
6158 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6159 // thread, because we will better shutdown later in the library destructor.
6160 // The reason of this change is performance problem when non-openmp thread in
6161 // a loop forks and joins many openmp threads. We can save a lot of time
6162 // keeping worker threads alive until the program shutdown.
6163 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6164 // and Windows(DPD200287443) that occurs when using critical sections from
6165 // foreign threads.
6166 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6167 return;
6168#endif
6169 /* synchronize the termination process */
6170 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006171
Jonathan Peyton30419822017-05-12 18:01:32 +00006172 /* have we already finished */
6173 if (__kmp_global.g.g_abort) {
6174 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6175 /* TODO abort? */
6176 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6177 return;
6178 }
6179 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6180 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6181 return;
6182 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006183
Jonathan Peyton30419822017-05-12 18:01:32 +00006184 /* We need this lock to enforce mutex between this reading of
6185 __kmp_threads_capacity and the writing by __kmp_register_root.
6186 Alternatively, we can use a counter of roots that is atomically updated by
6187 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6188 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006189
Jonathan Peyton30419822017-05-12 18:01:32 +00006190 /* should we finish the run-time? are all siblings done? */
6191 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006192
Jonathan Peyton30419822017-05-12 18:01:32 +00006193 for (i = 0; i < __kmp_threads_capacity; ++i) {
6194 if (KMP_UBER_GTID(i)) {
6195 KA_TRACE(
6196 10,
6197 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6198 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6199 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6200 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006201 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006202 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006203
Jonathan Peyton30419822017-05-12 18:01:32 +00006204 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006205
Jonathan Peyton30419822017-05-12 18:01:32 +00006206 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006207
Jonathan Peyton30419822017-05-12 18:01:32 +00006208 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6209 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006210
Jonathan Peyton30419822017-05-12 18:01:32 +00006211 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006212
Jonathan Peyton30419822017-05-12 18:01:32 +00006213#ifdef DUMP_DEBUG_ON_EXIT
6214 if (__kmp_debug_buf)
6215 __kmp_dump_debug_buffer();
6216#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006217} // __kmp_internal_end_thread
6218
Jonathan Peyton30419822017-05-12 18:01:32 +00006219// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006220// Library registration stuff.
6221
Jonathan Peyton30419822017-05-12 18:01:32 +00006222static long __kmp_registration_flag = 0;
6223// Random value used to indicate library initialization.
6224static char *__kmp_registration_str = NULL;
6225// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006226
Jonathan Peyton30419822017-05-12 18:01:32 +00006227static inline char *__kmp_reg_status_name() {
6228 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6229 each thread. If registration and unregistration go in different threads
6230 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6231 env var can not be found, because the name will contain different pid. */
6232 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006233} // __kmp_reg_status_get
6234
Jonathan Peyton30419822017-05-12 18:01:32 +00006235void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006236
Jonathan Peyton30419822017-05-12 18:01:32 +00006237 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6238 int done = 0;
6239 union {
6240 double dtime;
6241 long ltime;
6242 } time;
6243#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6244 __kmp_initialize_system_tick();
6245#endif
6246 __kmp_read_system_time(&time.dtime);
6247 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6248 __kmp_registration_str =
6249 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6250 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006251
Jonathan Peyton30419822017-05-12 18:01:32 +00006252 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6253 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006254
Jonathan Peyton30419822017-05-12 18:01:32 +00006255 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006256
Jonathan Peyton30419822017-05-12 18:01:32 +00006257 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006258
Jonathan Peyton30419822017-05-12 18:01:32 +00006259 // Set environment variable, but do not overwrite if it is exist.
6260 __kmp_env_set(name, __kmp_registration_str, 0);
6261 // Check the variable is written.
6262 value = __kmp_env_get(name);
6263 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006264
Jonathan Peyton30419822017-05-12 18:01:32 +00006265 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006266
Jonathan Peyton30419822017-05-12 18:01:32 +00006267 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006268
Jonathan Peyton30419822017-05-12 18:01:32 +00006269 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6270 // Check whether it alive or dead.
6271 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6272 char *tail = value;
6273 char *flag_addr_str = NULL;
6274 char *flag_val_str = NULL;
6275 char const *file_name = NULL;
6276 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6277 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6278 file_name = tail;
6279 if (tail != NULL) {
6280 long *flag_addr = 0;
6281 long flag_val = 0;
6282 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6283 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6284 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6285 // First, check whether environment-encoded address is mapped into
6286 // addr space.
6287 // If so, dereference it to see if it still has the right value.
6288 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6289 neighbor = 1;
6290 } else {
6291 // If not, then we know the other copy of the library is no longer
6292 // running.
6293 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006294 }
6295 }
6296 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006297 switch (neighbor) {
6298 case 0: // Cannot parse environment variable -- neighbor status unknown.
6299 // Assume it is the incompatible format of future version of the
6300 // library. Assume the other library is alive.
6301 // WARN( ... ); // TODO: Issue a warning.
6302 file_name = "unknown library";
6303 // Attention! Falling to the next case. That's intentional.
6304 case 1: { // Neighbor is alive.
6305 // Check it is allowed.
6306 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6307 if (!__kmp_str_match_true(duplicate_ok)) {
6308 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006309 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6310 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006311 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006312 KMP_INTERNAL_FREE(duplicate_ok);
6313 __kmp_duplicate_library_ok = 1;
6314 done = 1; // Exit the loop.
6315 } break;
6316 case 2: { // Neighbor is dead.
6317 // Clear the variable and try to register library again.
6318 __kmp_env_unset(name);
6319 } break;
6320 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006321 }
6322 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006323 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006324 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006325 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006326
6327} // func __kmp_register_library_startup
6328
Jonathan Peyton30419822017-05-12 18:01:32 +00006329void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006330
Jonathan Peyton30419822017-05-12 18:01:32 +00006331 char *name = __kmp_reg_status_name();
6332 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006333
Jonathan Peyton30419822017-05-12 18:01:32 +00006334 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6335 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6336 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6337 // Ok, this is our variable. Delete it.
6338 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006339 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006340
Jonathan Peyton30419822017-05-12 18:01:32 +00006341 KMP_INTERNAL_FREE(__kmp_registration_str);
6342 KMP_INTERNAL_FREE(value);
6343 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006344
Jonathan Peyton30419822017-05-12 18:01:32 +00006345 __kmp_registration_flag = 0;
6346 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006347
6348} // __kmp_unregister_library
6349
Jim Cownie5e8470a2013-09-27 10:38:44 +00006350// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006351// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006352
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006353#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006354
Jonathan Peyton30419822017-05-12 18:01:32 +00006355static void __kmp_check_mic_type() {
6356 kmp_cpuid_t cpuid_state = {0};
6357 kmp_cpuid_t *cs_p = &cpuid_state;
6358 __kmp_x86_cpuid(1, 0, cs_p);
6359 // We don't support mic1 at the moment
6360 if ((cs_p->eax & 0xff0) == 0xB10) {
6361 __kmp_mic_type = mic2;
6362 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6363 __kmp_mic_type = mic3;
6364 } else {
6365 __kmp_mic_type = non_mic;
6366 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006367}
6368
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006369#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006370
Jonathan Peyton30419822017-05-12 18:01:32 +00006371static void __kmp_do_serial_initialize(void) {
6372 int i, gtid;
6373 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006374
Jonathan Peyton30419822017-05-12 18:01:32 +00006375 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006376
Jonathan Peyton30419822017-05-12 18:01:32 +00006377 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6378 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6379 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6380 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6381 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006382
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006383#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006384 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006385#endif
6386
Jonathan Peyton30419822017-05-12 18:01:32 +00006387 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006388
Jonathan Peyton30419822017-05-12 18:01:32 +00006389 /* Initialize internal memory allocator */
6390 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006391
Jonathan Peyton30419822017-05-12 18:01:32 +00006392 /* Register the library startup via an environment variable and check to see
6393 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006394
Jonathan Peyton30419822017-05-12 18:01:32 +00006395 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006396
Jonathan Peyton30419822017-05-12 18:01:32 +00006397 /* TODO reinitialization of library */
6398 if (TCR_4(__kmp_global.g.g_done)) {
6399 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006401
Jonathan Peyton30419822017-05-12 18:01:32 +00006402 __kmp_global.g.g_abort = 0;
6403 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006404
Jonathan Peyton30419822017-05-12 18:01:32 +00006405/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006406#if KMP_USE_ADAPTIVE_LOCKS
6407#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006408 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006409#endif
6410#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006411#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006412 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006413#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006414 __kmp_init_lock(&__kmp_global_lock);
6415 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6416 __kmp_init_lock(&__kmp_debug_lock);
6417 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6418 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6419 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6420 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6421 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6422 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6423 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6424 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6425 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6426 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6427 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6428 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6429 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6430 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6431 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006432#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006433 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006434#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006435 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006436
Jonathan Peyton30419822017-05-12 18:01:32 +00006437 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006438
Jonathan Peyton30419822017-05-12 18:01:32 +00006439 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006440
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006441#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006442 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006443#endif
6444
Jonathan Peyton30419822017-05-12 18:01:32 +00006445// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006446#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006447 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006448#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006449 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006450
Jonathan Peyton30419822017-05-12 18:01:32 +00006451 // From __kmp_init_dflt_team_nth()
6452 /* assume the entire machine will be used */
6453 __kmp_dflt_team_nth_ub = __kmp_xproc;
6454 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6455 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6456 }
6457 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6458 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6459 }
6460 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006461 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006462 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6463 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6464 __kmp_teams_max_nth = __kmp_sys_max_nth;
6465 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006466
Jonathan Peyton30419822017-05-12 18:01:32 +00006467 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6468 // part
6469 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006470#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006471 __kmp_monitor_wakeups =
6472 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6473 __kmp_bt_intervals =
6474 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006475#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006476 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6477 __kmp_library = library_throughput;
6478 // From KMP_SCHEDULE initialization
6479 __kmp_static = kmp_sch_static_balanced;
6480// AC: do not use analytical here, because it is non-monotonous
6481//__kmp_guided = kmp_sch_guided_iterative_chunked;
6482//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6483// need to repeat assignment
6484// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6485// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006486#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006487#define kmp_reduction_barrier_gather_bb ((int)1)
6488#define kmp_reduction_barrier_release_bb ((int)1)
6489#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6490#define kmp_reduction_barrier_release_pat bp_hyper_bar
6491#endif // KMP_FAST_REDUCTION_BARRIER
6492 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6493 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6494 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6495 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6496 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6497#if KMP_FAST_REDUCTION_BARRIER
6498 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6499 // lin_64 ): hyper,1
6500 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6501 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6502 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6503 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006504 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006505#endif // KMP_FAST_REDUCTION_BARRIER
6506 }
6507#if KMP_FAST_REDUCTION_BARRIER
6508#undef kmp_reduction_barrier_release_pat
6509#undef kmp_reduction_barrier_gather_pat
6510#undef kmp_reduction_barrier_release_bb
6511#undef kmp_reduction_barrier_gather_bb
6512#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006513#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006514 if (__kmp_mic_type == mic2) { // KNC
6515 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6516 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6517 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6518 1; // forkjoin release
6519 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6520 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6521 }
6522#if KMP_FAST_REDUCTION_BARRIER
6523 if (__kmp_mic_type == mic2) { // KNC
6524 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6525 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6526 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006527#endif // KMP_FAST_REDUCTION_BARRIER
6528#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006529
Jonathan Peyton30419822017-05-12 18:01:32 +00006530// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006531#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006532 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006533#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006534 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006535#endif
6536
Jonathan Peyton30419822017-05-12 18:01:32 +00006537 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6538 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006539
Jonathan Peyton30419822017-05-12 18:01:32 +00006540 __kmp_global.g.g_dynamic = FALSE;
6541 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006542
Jonathan Peyton30419822017-05-12 18:01:32 +00006543 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006544
Jonathan Peyton30419822017-05-12 18:01:32 +00006545// Print all messages in message catalog for testing purposes.
6546#ifdef KMP_DEBUG
6547 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6548 if (__kmp_str_match_true(val)) {
6549 kmp_str_buf_t buffer;
6550 __kmp_str_buf_init(&buffer);
6551 __kmp_i18n_dump_catalog(&buffer);
6552 __kmp_printf("%s", buffer.str);
6553 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006554 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006555 __kmp_env_free(&val);
6556#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006557
Jonathan Peyton30419822017-05-12 18:01:32 +00006558 __kmp_threads_capacity =
6559 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6560 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6561 __kmp_tp_capacity = __kmp_default_tp_capacity(
6562 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006563
Jonathan Peyton30419822017-05-12 18:01:32 +00006564 // If the library is shut down properly, both pools must be NULL. Just in
6565 // case, set them to NULL -- some memory may leak, but subsequent code will
6566 // work even if pools are not freed.
6567 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6568 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6569 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6570 __kmp_thread_pool = NULL;
6571 __kmp_thread_pool_insert_pt = NULL;
6572 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006573
Jonathan Peyton30419822017-05-12 18:01:32 +00006574 /* Allocate all of the variable sized records */
6575 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6576 * expandable */
6577 /* Since allocation is cache-aligned, just add extra padding at the end */
6578 size =
6579 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6580 CACHE_LINE;
6581 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6582 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6583 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006584
Jonathan Peyton30419822017-05-12 18:01:32 +00006585 /* init thread counts */
6586 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6587 0); // Asserts fail if the library is reinitializing and
6588 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6589 __kmp_all_nth = 0;
6590 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006591
Jonathan Peyton30419822017-05-12 18:01:32 +00006592 /* setup the uber master thread and hierarchy */
6593 gtid = __kmp_register_root(TRUE);
6594 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6595 KMP_ASSERT(KMP_UBER_GTID(gtid));
6596 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006597
Jonathan Peyton30419822017-05-12 18:01:32 +00006598 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006599
Jonathan Peyton30419822017-05-12 18:01:32 +00006600 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006601
Jonathan Peyton30419822017-05-12 18:01:32 +00006602#if KMP_OS_UNIX
6603 /* invoke the child fork handler */
6604 __kmp_register_atfork();
6605#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006606
Jonathan Peyton30419822017-05-12 18:01:32 +00006607#if !defined KMP_DYNAMIC_LIB
6608 {
6609 /* Invoke the exit handler when the program finishes, only for static
6610 library. For dynamic library, we already have _fini and DllMain. */
6611 int rc = atexit(__kmp_internal_end_atexit);
6612 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006613 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6614 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006615 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006616 }
6617#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006618
Jonathan Peyton30419822017-05-12 18:01:32 +00006619#if KMP_HANDLE_SIGNALS
6620#if KMP_OS_UNIX
6621 /* NOTE: make sure that this is called before the user installs their own
6622 signal handlers so that the user handlers are called first. this way they
6623 can return false, not call our handler, avoid terminating the library, and
6624 continue execution where they left off. */
6625 __kmp_install_signals(FALSE);
6626#endif /* KMP_OS_UNIX */
6627#if KMP_OS_WINDOWS
6628 __kmp_install_signals(TRUE);
6629#endif /* KMP_OS_WINDOWS */
6630#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006631
Jonathan Peyton30419822017-05-12 18:01:32 +00006632 /* we have finished the serial initialization */
6633 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006634
Jonathan Peyton30419822017-05-12 18:01:32 +00006635 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006636
Jonathan Peyton30419822017-05-12 18:01:32 +00006637 if (__kmp_settings) {
6638 __kmp_env_print();
6639 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006640
6641#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006642 if (__kmp_display_env || __kmp_display_env_verbose) {
6643 __kmp_env_print_2();
6644 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006645#endif // OMP_40_ENABLED
6646
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006647#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006648 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006649#endif
6650
Jonathan Peyton30419822017-05-12 18:01:32 +00006651 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006652
Jonathan Peyton30419822017-05-12 18:01:32 +00006653 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006654}
6655
Jonathan Peyton30419822017-05-12 18:01:32 +00006656void __kmp_serial_initialize(void) {
6657 if (__kmp_init_serial) {
6658 return;
6659 }
6660 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6661 if (__kmp_init_serial) {
6662 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6663 return;
6664 }
6665 __kmp_do_serial_initialize();
6666 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6667}
6668
6669static void __kmp_do_middle_initialize(void) {
6670 int i, j;
6671 int prev_dflt_team_nth;
6672
6673 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006674 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006675 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006676
Jonathan Peyton30419822017-05-12 18:01:32 +00006677 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006678
Jonathan Peyton30419822017-05-12 18:01:32 +00006679 // Save the previous value for the __kmp_dflt_team_nth so that
6680 // we can avoid some reinitialization if it hasn't changed.
6681 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006682
Alp Toker98758b02014-03-02 04:12:06 +00006683#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006684 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6685 // number of cores on the machine.
6686 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006687
Jonathan Peyton30419822017-05-12 18:01:32 +00006688 // Run through the __kmp_threads array and set the affinity mask
6689 // for each root thread that is currently registered with the RTL.
6690 for (i = 0; i < __kmp_threads_capacity; i++) {
6691 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6692 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006693 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006694 }
Alp Toker98758b02014-03-02 04:12:06 +00006695#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006696
Jonathan Peyton30419822017-05-12 18:01:32 +00006697 KMP_ASSERT(__kmp_xproc > 0);
6698 if (__kmp_avail_proc == 0) {
6699 __kmp_avail_proc = __kmp_xproc;
6700 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006701
Jonathan Peyton30419822017-05-12 18:01:32 +00006702 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6703 // correct them now
6704 j = 0;
6705 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6706 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6707 __kmp_avail_proc;
6708 j++;
6709 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006710
Jonathan Peyton30419822017-05-12 18:01:32 +00006711 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006712#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006713 // Default #threads = #cores
6714 __kmp_dflt_team_nth = __kmp_ncores;
6715 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6716 "__kmp_ncores (%d)\n",
6717 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006718#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006719 // Default #threads = #available OS procs
6720 __kmp_dflt_team_nth = __kmp_avail_proc;
6721 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6722 "__kmp_avail_proc(%d)\n",
6723 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006724#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006725 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006726
Jonathan Peyton30419822017-05-12 18:01:32 +00006727 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6728 __kmp_dflt_team_nth = KMP_MIN_NTH;
6729 }
6730 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6731 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6732 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006733
Jonathan Peyton30419822017-05-12 18:01:32 +00006734 // There's no harm in continuing if the following check fails,
6735 // but it indicates an error in the previous logic.
6736 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006737
Jonathan Peyton30419822017-05-12 18:01:32 +00006738 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6739 // Run through the __kmp_threads array and set the num threads icv for each
6740 // root thread that is currently registered with the RTL (which has not
6741 // already explicitly set its nthreads-var with a call to
6742 // omp_set_num_threads()).
6743 for (i = 0; i < __kmp_threads_capacity; i++) {
6744 kmp_info_t *thread = __kmp_threads[i];
6745 if (thread == NULL)
6746 continue;
6747 if (thread->th.th_current_task->td_icvs.nproc != 0)
6748 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006749
Jonathan Peyton30419822017-05-12 18:01:32 +00006750 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006752 }
6753 KA_TRACE(
6754 20,
6755 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6756 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006757
6758#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006759 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6760 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6761 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6762 if (__kmp_nth > __kmp_avail_proc) {
6763 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006764 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006765 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006766#endif /* KMP_ADJUST_BLOCKTIME */
6767
Jonathan Peyton30419822017-05-12 18:01:32 +00006768 /* we have finished middle initialization */
6769 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006770
Jonathan Peyton30419822017-05-12 18:01:32 +00006771 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006772}
6773
Jonathan Peyton30419822017-05-12 18:01:32 +00006774void __kmp_middle_initialize(void) {
6775 if (__kmp_init_middle) {
6776 return;
6777 }
6778 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6779 if (__kmp_init_middle) {
6780 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6781 return;
6782 }
6783 __kmp_do_middle_initialize();
6784 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6785}
6786
6787void __kmp_parallel_initialize(void) {
6788 int gtid = __kmp_entry_gtid(); // this might be a new root
6789
6790 /* synchronize parallel initialization (for sibling) */
6791 if (TCR_4(__kmp_init_parallel))
6792 return;
6793 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6794 if (TCR_4(__kmp_init_parallel)) {
6795 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6796 return;
6797 }
6798
6799 /* TODO reinitialization after we have already shut down */
6800 if (TCR_4(__kmp_global.g.g_done)) {
6801 KA_TRACE(
6802 10,
6803 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6804 __kmp_infinite_loop();
6805 }
6806
6807 /* jc: The lock __kmp_initz_lock is already held, so calling
6808 __kmp_serial_initialize would cause a deadlock. So we call
6809 __kmp_do_serial_initialize directly. */
6810 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006811 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006812 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006813
Jonathan Peyton30419822017-05-12 18:01:32 +00006814 /* begin initialization */
6815 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6816 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006817
6818#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006819 // Save the FP control regs.
6820 // Worker threads will set theirs to these values at thread startup.
6821 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6822 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6823 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006824#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6825
6826#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006827#if KMP_HANDLE_SIGNALS
6828 /* must be after __kmp_serial_initialize */
6829 __kmp_install_signals(TRUE);
6830#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006831#endif
6832
Jonathan Peyton30419822017-05-12 18:01:32 +00006833 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006834
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006835#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006836 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6837 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6838 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006839#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006840 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6841 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6842 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006843#endif
6844
Jonathan Peyton30419822017-05-12 18:01:32 +00006845 if (__kmp_version) {
6846 __kmp_print_version_2();
6847 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006848
Jonathan Peyton30419822017-05-12 18:01:32 +00006849 /* we have finished parallel initialization */
6850 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851
Jonathan Peyton30419822017-05-12 18:01:32 +00006852 KMP_MB();
6853 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006854
Jonathan Peyton30419822017-05-12 18:01:32 +00006855 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006856}
6857
Jim Cownie5e8470a2013-09-27 10:38:44 +00006858/* ------------------------------------------------------------------------ */
6859
Jonathan Peyton30419822017-05-12 18:01:32 +00006860void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6861 kmp_team_t *team) {
6862 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006863
Jonathan Peyton30419822017-05-12 18:01:32 +00006864 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006865
Jonathan Peyton30419822017-05-12 18:01:32 +00006866 /* none of the threads have encountered any constructs, yet. */
6867 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006868#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006869 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006870#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006871 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6872 KMP_DEBUG_ASSERT(dispatch);
6873 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6874 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6875 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006876
Jonathan Peyton30419822017-05-12 18:01:32 +00006877 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006878#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006879 dispatch->th_doacross_buf_idx =
6880 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006881#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006882 if (__kmp_env_consistency_check)
6883 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884
Jonathan Peyton30419822017-05-12 18:01:32 +00006885 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006886}
6887
Jonathan Peyton30419822017-05-12 18:01:32 +00006888void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6889 kmp_team_t *team) {
6890 if (__kmp_env_consistency_check)
6891 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006892
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006894}
6895
Jonathan Peyton30419822017-05-12 18:01:32 +00006896int __kmp_invoke_task_func(int gtid) {
6897 int rc;
6898 int tid = __kmp_tid_from_gtid(gtid);
6899 kmp_info_t *this_thr = __kmp_threads[gtid];
6900 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006901
Jonathan Peyton30419822017-05-12 18:01:32 +00006902 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006903#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006904 if (__itt_stack_caller_create_ptr) {
6905 __kmp_itt_stack_callee_enter(
6906 (__itt_caller)
6907 team->t.t_stack_id); // inform ittnotify about entering user's code
6908 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006909#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006910#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006911 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006912#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006913
6914#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006915 void *dummy;
6916 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00006917 ompt_data_t *my_task_data;
6918 ompt_data_t *my_parallel_data;
6919 int ompt_team_size;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006920
Joachim Protze82e94a52017-11-01 10:08:30 +00006921 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00006922 exit_runtime_p = &(
6923 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
Jonathan Peyton30419822017-05-12 18:01:32 +00006924 } else {
6925 exit_runtime_p = &dummy;
6926 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006927
Joachim Protze82e94a52017-11-01 10:08:30 +00006928 my_task_data =
6929 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6930 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6931 if (ompt_enabled.ompt_callback_implicit_task) {
6932 ompt_team_size = team->t.t_nproc;
6933 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6934 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6935 __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00006936 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00006937 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006938#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006939
Jonathan Peyton30419822017-05-12 18:01:32 +00006940 {
6941 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6942 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6943 rc =
6944 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6945 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006946#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006947 ,
6948 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006949#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006950 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006951#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006952 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006953#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006954 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006955
Jim Cownie5e8470a2013-09-27 10:38:44 +00006956#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006957 if (__itt_stack_caller_create_ptr) {
6958 __kmp_itt_stack_callee_leave(
6959 (__itt_caller)
6960 team->t.t_stack_id); // inform ittnotify about leaving user's code
6961 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006962#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006963 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006964
Jonathan Peyton30419822017-05-12 18:01:32 +00006965 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006966}
6967
6968#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006969void __kmp_teams_master(int gtid) {
6970 // This routine is called by all master threads in teams construct
6971 kmp_info_t *thr = __kmp_threads[gtid];
6972 kmp_team_t *team = thr->th.th_team;
6973 ident_t *loc = team->t.t_ident;
6974 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6975 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6976 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6977 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6978 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6979// Launch league of teams now, but not let workers execute
6980// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006981#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006982 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006983#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006984 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Jonathan Peyton30419822017-05-12 18:01:32 +00006985 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
6986 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006987#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006988 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006989#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006990
Jonathan Peyton30419822017-05-12 18:01:32 +00006991 // AC: last parameter "1" eliminates join barrier which won't work because
6992 // worker threads are in a fork barrier waiting for more parallel regions
6993 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006994#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006995 ,
6996 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006997#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006998 ,
6999 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007000}
7001
Jonathan Peyton30419822017-05-12 18:01:32 +00007002int __kmp_invoke_teams_master(int gtid) {
7003 kmp_info_t *this_thr = __kmp_threads[gtid];
7004 kmp_team_t *team = this_thr->th.th_team;
7005#if KMP_DEBUG
7006 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7007 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7008 (void *)__kmp_teams_master);
7009#endif
7010 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7011 __kmp_teams_master(gtid);
7012 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7013 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007014}
7015#endif /* OMP_40_ENABLED */
7016
7017/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007018 encountered by this team. since this should be enclosed in the forkjoin
7019 critical section it should avoid race conditions with assymmetrical nested
7020 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007021
Jonathan Peyton30419822017-05-12 18:01:32 +00007022void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7023 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007024
Jonathan Peyton30419822017-05-12 18:01:32 +00007025 if (num_threads > 0)
7026 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007027}
7028
7029#if OMP_40_ENABLED
7030
7031/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007032 the number of threads for the next parallel region encountered */
7033void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7034 int num_threads) {
7035 kmp_info_t *thr = __kmp_threads[gtid];
7036 KMP_DEBUG_ASSERT(num_teams >= 0);
7037 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007038
Jonathan Peyton30419822017-05-12 18:01:32 +00007039 if (num_teams == 0)
7040 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007041 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007042 if (!__kmp_reserve_warn) {
7043 __kmp_reserve_warn = 1;
7044 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007045 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007046 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007047 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007048 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007049 }
7050 // Set number of teams (number of threads in the outer "parallel" of the
7051 // teams)
7052 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007053
Jonathan Peyton30419822017-05-12 18:01:32 +00007054 // Remember the number of threads for inner parallel regions
7055 if (num_threads == 0) {
7056 if (!TCR_4(__kmp_init_middle))
7057 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7058 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007059 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007060 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007061 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007062 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007063 } else {
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007064 if (num_teams * num_threads > __kmp_teams_max_nth) {
7065 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007066 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007067 __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007068 __kmp_msg(kmp_ms_warning,
7069 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7070 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7071 }
7072 num_threads = new_threads;
7073 }
7074 }
7075 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007076}
7077
Jim Cownie5e8470a2013-09-27 10:38:44 +00007078// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007079void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7080 kmp_info_t *thr = __kmp_threads[gtid];
7081 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007082}
7083
7084#endif /* OMP_40_ENABLED */
7085
7086/* Launch the worker threads into the microtask. */
7087
Jonathan Peyton30419822017-05-12 18:01:32 +00007088void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7089 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090
7091#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007092 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007093#endif /* KMP_DEBUG */
7094
Jonathan Peyton30419822017-05-12 18:01:32 +00007095 KMP_DEBUG_ASSERT(team);
7096 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7097 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7098 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007099
Jonathan Peyton30419822017-05-12 18:01:32 +00007100 team->t.t_construct = 0; /* no single directives seen yet */
7101 team->t.t_ordered.dt.t_value =
7102 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007103
Jonathan Peyton30419822017-05-12 18:01:32 +00007104 /* Reset the identifiers on the dispatch buffer */
7105 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7106 if (team->t.t_max_nproc > 1) {
7107 int i;
7108 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7109 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007110#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007111 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007112#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007113 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007114 } else {
7115 team->t.t_disp_buffer[0].buffer_index = 0;
7116#if OMP_45_ENABLED
7117 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7118#endif
7119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007120
Jonathan Peyton30419822017-05-12 18:01:32 +00007121 KMP_MB(); /* Flush all pending memory write invalidates. */
7122 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007123
7124#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007125 for (f = 0; f < team->t.t_nproc; f++) {
7126 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7127 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7128 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007129#endif /* KMP_DEBUG */
7130
Jonathan Peyton30419822017-05-12 18:01:32 +00007131 /* release the worker threads so they may begin working */
7132 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007133}
7134
Jonathan Peyton30419822017-05-12 18:01:32 +00007135void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7136 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007137
Jonathan Peyton30419822017-05-12 18:01:32 +00007138 KMP_DEBUG_ASSERT(team);
7139 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7140 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7141 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007142
Jonathan Peyton30419822017-05-12 18:01:32 +00007143/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007144
7145#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007146 if (__kmp_threads[gtid] &&
7147 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7148 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7149 __kmp_threads[gtid]);
7150 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7151 "team->t.t_nproc=%d\n",
7152 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7153 team->t.t_nproc);
7154 __kmp_print_structure();
7155 }
7156 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7157 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007158#endif /* KMP_DEBUG */
7159
Jonathan Peyton30419822017-05-12 18:01:32 +00007160 __kmp_join_barrier(gtid); /* wait for everyone */
Joachim Protze82e94a52017-11-01 10:08:30 +00007161#if OMPT_SUPPORT
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007162 if (ompt_enabled.enabled &&
7163 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7164 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7165 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007166 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7167#if OMPT_OPTIONAL
7168 void *codeptr = NULL;
7169 if (KMP_MASTER_TID(ds_tid) &&
7170 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7171 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7172 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7173
7174 if (ompt_enabled.ompt_callback_sync_region_wait) {
7175 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007176 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007177 }
7178 if (ompt_enabled.ompt_callback_sync_region) {
7179 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007180 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007181 }
7182#endif
7183 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7184 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007185 ompt_scope_end, NULL, task_data, 0, ds_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00007186 }
Joachim Protze82e94a52017-11-01 10:08:30 +00007187 }
7188#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007189
Jonathan Peyton30419822017-05-12 18:01:32 +00007190 KMP_MB(); /* Flush all pending memory write invalidates. */
7191 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007192}
7193
Jim Cownie5e8470a2013-09-27 10:38:44 +00007194/* ------------------------------------------------------------------------ */
7195
7196#ifdef USE_LOAD_BALANCE
7197
Jim Cownie5e8470a2013-09-27 10:38:44 +00007198// Return the worker threads actively spinning in the hot team, if we
7199// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007200static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7201 int i;
7202 int retval;
7203 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007204
Jonathan Peyton30419822017-05-12 18:01:32 +00007205 if (root->r.r_active) {
7206 return 0;
7207 }
7208 hot_team = root->r.r_hot_team;
7209 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7210 return hot_team->t.t_nproc - 1; // Don't count master thread
7211 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007212
Jonathan Peyton30419822017-05-12 18:01:32 +00007213 // Skip the master thread - it is accounted for elsewhere.
7214 retval = 0;
7215 for (i = 1; i < hot_team->t.t_nproc; i++) {
7216 if (hot_team->t.t_threads[i]->th.th_active) {
7217 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007218 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007219 }
7220 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007221}
7222
Jim Cownie5e8470a2013-09-27 10:38:44 +00007223// Perform an automatic adjustment to the number of
7224// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007225static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7226 int retval;
7227 int pool_active;
7228 int hot_team_active;
7229 int team_curr_active;
7230 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007231
Jonathan Peyton30419822017-05-12 18:01:32 +00007232 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7233 set_nproc));
7234 KMP_DEBUG_ASSERT(root);
7235 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7236 ->th.th_current_task->td_icvs.dynamic == TRUE);
7237 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007238
Jonathan Peyton30419822017-05-12 18:01:32 +00007239 if (set_nproc == 1) {
7240 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7241 return 1;
7242 }
7243
7244 // Threads that are active in the thread pool, active in the hot team for this
7245 // particular root (if we are at the outer par level), and the currently
7246 // executing thread (to become the master) are available to add to the new
7247 // team, but are currently contributing to the system load, and must be
7248 // accounted for.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00007249 pool_active = __kmp_thread_pool_active_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007250 hot_team_active = __kmp_active_hot_team_nproc(root);
7251 team_curr_active = pool_active + hot_team_active + 1;
7252
7253 // Check the system load.
7254 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7255 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7256 "hot team active = %d\n",
7257 system_active, pool_active, hot_team_active));
7258
7259 if (system_active < 0) {
7260 // There was an error reading the necessary info from /proc, so use the
7261 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7262 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7263 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7264 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7265
7266 // Make this call behave like the thread limit algorithm.
7267 retval = __kmp_avail_proc - __kmp_nth +
7268 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7269 if (retval > set_nproc) {
7270 retval = set_nproc;
7271 }
7272 if (retval < KMP_MIN_NTH) {
7273 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007274 }
7275
Jonathan Peyton30419822017-05-12 18:01:32 +00007276 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7277 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007278 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007279 }
7280
7281 // There is a slight delay in the load balance algorithm in detecting new
7282 // running procs. The real system load at this instant should be at least as
7283 // large as the #active omp thread that are available to add to the team.
7284 if (system_active < team_curr_active) {
7285 system_active = team_curr_active;
7286 }
7287 retval = __kmp_avail_proc - system_active + team_curr_active;
7288 if (retval > set_nproc) {
7289 retval = set_nproc;
7290 }
7291 if (retval < KMP_MIN_NTH) {
7292 retval = KMP_MIN_NTH;
7293 }
7294
7295 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7296 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007297} // __kmp_load_balance_nproc()
7298
7299#endif /* USE_LOAD_BALANCE */
7300
Jim Cownie5e8470a2013-09-27 10:38:44 +00007301/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007302
7303/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007304void __kmp_cleanup(void) {
7305 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007306
Jonathan Peyton30419822017-05-12 18:01:32 +00007307 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007308
Jonathan Peyton30419822017-05-12 18:01:32 +00007309 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007310#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007311 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007312#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007313 TCW_4(__kmp_init_parallel, FALSE);
7314 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007315
Jonathan Peyton30419822017-05-12 18:01:32 +00007316 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007317#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007318 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007319#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007320 __kmp_cleanup_hierarchy();
7321 TCW_4(__kmp_init_middle, FALSE);
7322 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007323
Jonathan Peyton30419822017-05-12 18:01:32 +00007324 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007325
Jonathan Peyton30419822017-05-12 18:01:32 +00007326 if (__kmp_init_serial) {
7327 __kmp_runtime_destroy();
7328 __kmp_init_serial = FALSE;
7329 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007330
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00007331 __kmp_cleanup_threadprivate_caches();
7332
Jonathan Peyton30419822017-05-12 18:01:32 +00007333 for (f = 0; f < __kmp_threads_capacity; f++) {
7334 if (__kmp_root[f] != NULL) {
7335 __kmp_free(__kmp_root[f]);
7336 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007337 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007338 }
7339 __kmp_free(__kmp_threads);
7340 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7341 // there is no need in freeing __kmp_root.
7342 __kmp_threads = NULL;
7343 __kmp_root = NULL;
7344 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007345
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007346#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007347 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007348#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007349 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007350#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007351
Jonathan Peyton30419822017-05-12 18:01:32 +00007352#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007353 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007354 __kmp_cpuinfo_file = NULL;
7355#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007356
Jonathan Peyton30419822017-05-12 18:01:32 +00007357#if KMP_USE_ADAPTIVE_LOCKS
7358#if KMP_DEBUG_ADAPTIVE_LOCKS
7359 __kmp_print_speculative_stats();
7360#endif
7361#endif
7362 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7363 __kmp_nested_nth.nth = NULL;
7364 __kmp_nested_nth.size = 0;
7365 __kmp_nested_nth.used = 0;
7366 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7367 __kmp_nested_proc_bind.bind_types = NULL;
7368 __kmp_nested_proc_bind.size = 0;
7369 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007370
Jonathan Peyton30419822017-05-12 18:01:32 +00007371 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007372
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007373#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007374 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007375#endif
7376
Jonathan Peyton30419822017-05-12 18:01:32 +00007377 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007378}
7379
7380/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007381
7382int __kmp_ignore_mppbeg(void) {
7383 char *env;
7384
7385 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7386 if (__kmp_str_match_false(env))
7387 return FALSE;
7388 }
7389 // By default __kmpc_begin() is no-op.
7390 return TRUE;
7391}
7392
7393int __kmp_ignore_mppend(void) {
7394 char *env;
7395
7396 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7397 if (__kmp_str_match_false(env))
7398 return FALSE;
7399 }
7400 // By default __kmpc_end() is no-op.
7401 return TRUE;
7402}
7403
7404void __kmp_internal_begin(void) {
7405 int gtid;
7406 kmp_root_t *root;
7407
7408 /* this is a very important step as it will register new sibling threads
7409 and assign these new uber threads a new gtid */
7410 gtid = __kmp_entry_gtid();
7411 root = __kmp_threads[gtid]->th.th_root;
7412 KMP_ASSERT(KMP_UBER_GTID(gtid));
7413
7414 if (root->r.r_begin)
7415 return;
7416 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7417 if (root->r.r_begin) {
7418 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7419 return;
7420 }
7421
7422 root->r.r_begin = TRUE;
7423
7424 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7425}
7426
Jim Cownie5e8470a2013-09-27 10:38:44 +00007427/* ------------------------------------------------------------------------ */
7428
Jonathan Peyton30419822017-05-12 18:01:32 +00007429void __kmp_user_set_library(enum library_type arg) {
7430 int gtid;
7431 kmp_root_t *root;
7432 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007433
Jonathan Peyton30419822017-05-12 18:01:32 +00007434 /* first, make sure we are initialized so we can get our gtid */
7435
7436 gtid = __kmp_entry_gtid();
7437 thread = __kmp_threads[gtid];
7438
7439 root = thread->th.th_root;
7440
7441 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7442 library_serial));
7443 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7444 thread */
7445 KMP_WARNING(SetLibraryIncorrectCall);
7446 return;
7447 }
7448
7449 switch (arg) {
7450 case library_serial:
7451 thread->th.th_set_nproc = 0;
7452 set__nproc(thread, 1);
7453 break;
7454 case library_turnaround:
7455 thread->th.th_set_nproc = 0;
7456 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7457 : __kmp_dflt_team_nth_ub);
7458 break;
7459 case library_throughput:
7460 thread->th.th_set_nproc = 0;
7461 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7462 : __kmp_dflt_team_nth_ub);
7463 break;
7464 default:
7465 KMP_FATAL(UnknownLibraryType, arg);
7466 }
7467
7468 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007469}
7470
Jonathan Peyton30419822017-05-12 18:01:32 +00007471void __kmp_aux_set_stacksize(size_t arg) {
7472 if (!__kmp_init_serial)
7473 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007474
7475#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007476 if (arg & (0x1000 - 1)) {
7477 arg &= ~(0x1000 - 1);
7478 if (arg + 0x1000) /* check for overflow if we round up */
7479 arg += 0x1000;
7480 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007481#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007482 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007483
Jonathan Peyton30419822017-05-12 18:01:32 +00007484 /* only change the default stacksize before the first parallel region */
7485 if (!TCR_4(__kmp_init_parallel)) {
7486 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007487
Jonathan Peyton30419822017-05-12 18:01:32 +00007488 if (value < __kmp_sys_min_stksize)
7489 value = __kmp_sys_min_stksize;
7490 else if (value > KMP_MAX_STKSIZE)
7491 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007492
Jonathan Peyton30419822017-05-12 18:01:32 +00007493 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007494
Jonathan Peyton30419822017-05-12 18:01:32 +00007495 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7496 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007497
Jonathan Peyton30419822017-05-12 18:01:32 +00007498 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007499}
7500
7501/* set the behaviour of the runtime library */
7502/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007503void __kmp_aux_set_library(enum library_type arg) {
7504 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506 switch (__kmp_library) {
7507 case library_serial: {
7508 KMP_INFORM(LibraryIsSerial);
7509 (void)__kmp_change_library(TRUE);
7510 } break;
7511 case library_turnaround:
7512 (void)__kmp_change_library(TRUE);
7513 break;
7514 case library_throughput:
7515 (void)__kmp_change_library(FALSE);
7516 break;
7517 default:
7518 KMP_FATAL(UnknownLibraryType, arg);
7519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007520}
7521
7522/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523
Jonathan Peyton30419822017-05-12 18:01:32 +00007524void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7525 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007526#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007527 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007528#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007529 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007530
Jonathan Peyton30419822017-05-12 18:01:32 +00007531 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007532
Jonathan Peyton30419822017-05-12 18:01:32 +00007533 /* Normalize and set blocktime for the teams */
7534 if (blocktime < KMP_MIN_BLOCKTIME)
7535 blocktime = KMP_MIN_BLOCKTIME;
7536 else if (blocktime > KMP_MAX_BLOCKTIME)
7537 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007538
Jonathan Peyton30419822017-05-12 18:01:32 +00007539 set__blocktime_team(thread->th.th_team, tid, blocktime);
7540 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007541
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007542#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007543 /* Calculate and set blocktime intervals for the teams */
7544 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007545
Jonathan Peyton30419822017-05-12 18:01:32 +00007546 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7547 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007548#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007549
Jonathan Peyton30419822017-05-12 18:01:32 +00007550 /* Set whether blocktime has been set to "TRUE" */
7551 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007552
Jonathan Peyton30419822017-05-12 18:01:32 +00007553 set__bt_set_team(thread->th.th_team, tid, bt_set);
7554 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007555#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007556 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7557 "bt_intervals=%d, monitor_updates=%d\n",
7558 __kmp_gtid_from_tid(tid, thread->th.th_team),
7559 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7560 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007561#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007562 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7563 __kmp_gtid_from_tid(tid, thread->th.th_team),
7564 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007565#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007566}
7567
Jonathan Peyton30419822017-05-12 18:01:32 +00007568void __kmp_aux_set_defaults(char const *str, int len) {
7569 if (!__kmp_init_serial) {
7570 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00007571 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007572 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007573
Jonathan Peyton30419822017-05-12 18:01:32 +00007574 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007575#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007576 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007577#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007578 ) {
7579 __kmp_env_print();
7580 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007581} // __kmp_aux_set_defaults
7582
7583/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007584/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007585
Jim Cownie5e8470a2013-09-27 10:38:44 +00007586PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007587__kmp_determine_reduction_method(
7588 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7589 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7590 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007591
Jonathan Peyton30419822017-05-12 18:01:32 +00007592 // Default reduction method: critical construct ( lck != NULL, like in current
7593 // PAROPT )
7594 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7595 // can be selected by RTL
7596 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7597 // can be selected by RTL
7598 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7599 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007600
Jonathan Peyton30419822017-05-12 18:01:32 +00007601 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007602
Jonathan Peyton30419822017-05-12 18:01:32 +00007603 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007604
Jonathan Peyton30419822017-05-12 18:01:32 +00007605 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7606 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007607
Jonathan Peyton30419822017-05-12 18:01:32 +00007608#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7609 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7610#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007611
Jonathan Peyton30419822017-05-12 18:01:32 +00007612 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007613
Jonathan Peyton30419822017-05-12 18:01:32 +00007614 // another choice of getting a team size (with 1 dynamic deference) is slower
7615 team_size = __kmp_get_team_num_threads(global_tid);
7616 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007617
Jonathan Peyton30419822017-05-12 18:01:32 +00007618 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007619
Jonathan Peyton30419822017-05-12 18:01:32 +00007620 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007621
Jonathan Peyton30419822017-05-12 18:01:32 +00007622 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7623 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007624
Jonathan Peyton30419822017-05-12 18:01:32 +00007625#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007626
Jonathan Peyton30419822017-05-12 18:01:32 +00007627#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7628 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007629
Jonathan Peyton30419822017-05-12 18:01:32 +00007630 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007631
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007632#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007633 if (__kmp_mic_type != non_mic) {
7634 teamsize_cutoff = 8;
7635 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007636#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007637 if (tree_available) {
7638 if (team_size <= teamsize_cutoff) {
7639 if (atomic_available) {
7640 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007641 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007642 } else {
7643 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7644 }
7645 } else if (atomic_available) {
7646 retval = atomic_reduce_block;
7647 }
7648#else
7649#error "Unknown or unsupported OS"
7650#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7651// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007652
Jonathan Peyton30419822017-05-12 18:01:32 +00007653#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7654
7655#if KMP_OS_LINUX || KMP_OS_WINDOWS
7656
7657 // basic tuning
7658
7659 if (atomic_available) {
7660 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7661 retval = atomic_reduce_block;
7662 }
7663 } // otherwise: use critical section
7664
7665#elif KMP_OS_DARWIN
7666
7667 if (atomic_available && (num_vars <= 3)) {
7668 retval = atomic_reduce_block;
7669 } else if (tree_available) {
7670 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7671 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7672 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7673 }
7674 } // otherwise: use critical section
7675
7676#else
7677#error "Unknown or unsupported OS"
7678#endif
7679
7680#else
7681#error "Unknown or unsupported architecture"
7682#endif
7683 }
7684
7685 // KMP_FORCE_REDUCTION
7686
7687 // If the team is serialized (team_size == 1), ignore the forced reduction
7688 // method and stay with the unsynchronized method (empty_reduce_block)
7689 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7690 team_size != 1) {
7691
7692 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7693
7694 int atomic_available, tree_available;
7695
7696 switch ((forced_retval = __kmp_force_reduction_method)) {
7697 case critical_reduce_block:
7698 KMP_ASSERT(lck); // lck should be != 0
7699 break;
7700
7701 case atomic_reduce_block:
7702 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7703 if (!atomic_available) {
7704 KMP_WARNING(RedMethodNotSupported, "atomic");
7705 forced_retval = critical_reduce_block;
7706 }
7707 break;
7708
7709 case tree_reduce_block:
7710 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7711 if (!tree_available) {
7712 KMP_WARNING(RedMethodNotSupported, "tree");
7713 forced_retval = critical_reduce_block;
7714 } else {
7715#if KMP_FAST_REDUCTION_BARRIER
7716 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7717#endif
7718 }
7719 break;
7720
7721 default:
7722 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007723 }
7724
Jonathan Peyton30419822017-05-12 18:01:32 +00007725 retval = forced_retval;
7726 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007727
Jonathan Peyton30419822017-05-12 18:01:32 +00007728 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007729
Jonathan Peyton30419822017-05-12 18:01:32 +00007730#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7731#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7732
7733 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007734}
7735
7736// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007737kmp_int32 __kmp_get_reduce_method(void) {
7738 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007739}