blob: bd3b8fef7ba176cc8caaac2c004d0f70c1a872c5 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_i18n.h"
20#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000021#include "kmp_itt.h"
22#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000023#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000024#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000027
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000028#if OMPT_SUPPORT
29#include "ompt-specific.h"
30#endif
31
Jim Cownie5e8470a2013-09-27 10:38:44 +000032/* these are temporary issues to be dealt with */
33#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000034
Jim Cownie5e8470a2013-09-27 10:38:44 +000035#if KMP_OS_WINDOWS
36#include <process.h>
37#endif
38
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000039#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000040
41#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000042char const __kmp_version_alt_comp[] =
43 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000044#endif /* defined(KMP_GOMP_COMPAT) */
45
46char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000047#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000048 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000049#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000050 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000053#else
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000055#endif
56
57#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000058char const __kmp_version_lock[] =
59 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jonathan Peyton30419822017-05-12 18:01:32 +000062#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000063
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000065
66kmp_info_t __kmp_monitor;
67
Jim Cownie5e8470a2013-09-27 10:38:44 +000068/* Forward declarations */
69
Jonathan Peyton30419822017-05-12 18:01:32 +000070void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
73 int gtid);
74static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
75 kmp_internal_control_t *new_icvs,
76 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000077#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000078static void __kmp_partition_places(kmp_team_t *team,
79 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000080#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000081static void __kmp_do_serial_initialize(void);
82void __kmp_fork_barrier(int gtid, int tid);
83void __kmp_join_barrier(int gtid);
84void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
85 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000086
Jim Cownie5e8470a2013-09-27 10:38:44 +000087#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000088static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000089#endif
90
91static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000092#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000093static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000094#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000095static void __kmp_unregister_library(void); // called by __kmp_internal_end()
96static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +000097static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
98
Jim Cownie5e8470a2013-09-27 10:38:44 +000099/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000100/* fast (and somewhat portable) way to get unique identifier of executing
101 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000102int __kmp_get_global_thread_id() {
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110 KA_TRACE(
111 1000,
112 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114
Jonathan Peyton30419822017-05-12 18:01:32 +0000115 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
116 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
117 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
118 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119
Jonathan Peyton30419822017-05-12 18:01:32 +0000120 if (!TCR_4(__kmp_init_gtid))
121 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122
123#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000124 if (TCR_4(__kmp_gtid_mode) >= 3) {
125 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
126 return __kmp_gtid;
127 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 if (TCR_4(__kmp_gtid_mode) >= 2) {
130 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
131 return __kmp_gtid_get_specific();
132 }
133 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
134
135 stack_addr = (char *)&stack_data;
136 other_threads = __kmp_threads;
137
138 /* ATT: The code below is a source of potential bugs due to unsynchronized
139 access to __kmp_threads array. For example:
140 1. Current thread loads other_threads[i] to thr and checks it, it is
141 non-NULL.
142 2. Current thread is suspended by OS.
143 3. Another thread unregisters and finishes (debug versions of free()
144 may fill memory with something like 0xEF).
145 4. Current thread is resumed.
146 5. Current thread reads junk from *thr.
147 TODO: Fix it. --ln */
148
149 for (i = 0; i < __kmp_threads_capacity; i++) {
150
151 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
152 if (!thr)
153 continue;
154
155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
157
158 /* stack grows down -- search through all of the active threads */
159
160 if (stack_addr <= stack_base) {
161 size_t stack_diff = stack_base - stack_addr;
162
163 if (stack_diff <= stack_size) {
164 /* The only way we can be closer than the allocated */
165 /* stack size is if we are running on this thread. */
166 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
167 return i;
168 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 /* get specific to try and determine our gtid */
173 KA_TRACE(1000,
174 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
175 "thread, using TLS\n"));
176 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177
Jonathan Peyton30419822017-05-12 18:01:32 +0000178 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 /* if we havn't been assigned a gtid, then return code */
181 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000183
184 /* dynamically updated stack window for uber threads to avoid get_specific
185 call */
186 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
187 KMP_FATAL(StackOverflow, i);
188 }
189
190 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
191 if (stack_addr > stack_base) {
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
194 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
195 stack_base);
196 } else {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
198 stack_base - stack_addr);
199 }
200
201 /* Reprint stack bounds for ubermaster since they have been refined */
202 if (__kmp_storage_map) {
203 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
204 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
205 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
206 other_threads[i]->th.th_info.ds.ds_stacksize,
207 "th_%d stack (refinement)", i);
208 }
209 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210}
211
Jonathan Peyton30419822017-05-12 18:01:32 +0000212int __kmp_get_global_thread_id_reg() {
213 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214
Jonathan Peyton30419822017-05-12 18:01:32 +0000215 if (!__kmp_init_serial) {
216 gtid = KMP_GTID_DNE;
217 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000219 if (TCR_4(__kmp_gtid_mode) >= 3) {
220 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
221 gtid = __kmp_gtid;
222 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000224 if (TCR_4(__kmp_gtid_mode) >= 2) {
225 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
226 gtid = __kmp_gtid_get_specific();
227 } else {
228 KA_TRACE(1000,
229 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
230 gtid = __kmp_get_global_thread_id();
231 }
232
233 /* we must be a new uber master sibling thread */
234 if (gtid == KMP_GTID_DNE) {
235 KA_TRACE(10,
236 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
237 "Registering a new gtid.\n"));
238 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
239 if (!__kmp_init_serial) {
240 __kmp_do_serial_initialize();
241 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000243 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
246 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
247 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252}
253
254/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000255void __kmp_check_stack_overlap(kmp_info_t *th) {
256 int f;
257 char *stack_beg = NULL;
258 char *stack_end = NULL;
259 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
Jonathan Peyton30419822017-05-12 18:01:32 +0000261 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
262 if (__kmp_storage_map) {
263 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
264 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 if (gtid == KMP_GTID_MONITOR) {
269 __kmp_print_storage_map_gtid(
270 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)", "mon",
272 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%d stack (%s)", gtid,
277 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
278 }
279 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 /* No point in checking ubermaster threads since they use refinement and
282 * cannot overlap */
283 gtid = __kmp_gtid_from_thread(th);
284 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
285 KA_TRACE(10,
286 ("__kmp_check_stack_overlap: performing extensive checking\n"));
287 if (stack_beg == NULL) {
288 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
289 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
290 }
291
292 for (f = 0; f < __kmp_threads_capacity; f++) {
293 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
294
295 if (f_th && f_th != th) {
296 char *other_stack_end =
297 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
298 char *other_stack_beg =
299 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
300 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
301 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
302
303 /* Print the other stack values before the abort */
304 if (__kmp_storage_map)
305 __kmp_print_storage_map_gtid(
306 -1, other_stack_beg, other_stack_end,
307 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
308 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
309
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000310 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
311 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 }
313 }
314 }
315 }
316 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
317}
318
319/* ------------------------------------------------------------------------ */
320
321void __kmp_infinite_loop(void) {
322 static int done = FALSE;
323
324 while (!done) {
325 KMP_YIELD(1);
326 }
327}
328
329#define MAX_MESSAGE 512
330
331void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
332 char const *format, ...) {
333 char buffer[MAX_MESSAGE];
334 va_list ap;
335
336 va_start(ap, format);
337 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
338 p2, (unsigned long)size, format);
339 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
340 __kmp_vprintf(kmp_err, buffer, ap);
341#if KMP_PRINT_DATA_PLACEMENT
342 int node;
343 if (gtid >= 0) {
344 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
345 if (__kmp_storage_map_verbose) {
346 node = __kmp_get_host_node(p1);
347 if (node < 0) /* doesn't work, so don't try this next time */
348 __kmp_storage_map_verbose = FALSE;
349 else {
350 char *last;
351 int lastNode;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
353
354 const int page_size = KMP_GET_PAGE_SIZE();
355
356 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
357 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
358 if (localProc >= 0)
359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
360 localProc >> 1);
361 else
362 __kmp_printf_no_lock(" GTID %d\n", gtid);
363#if KMP_USE_PRCTL
364 /* The more elaborate format is disabled for now because of the prctl
365 * hanging bug. */
366 do {
367 last = p1;
368 lastNode = node;
369 /* This loop collates adjacent pages with the same host node. */
370 do {
371 (char *)p1 += page_size;
372 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
373 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
374 lastNode);
375 } while (p1 <= p2);
376#else
377 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
378 (char *)p1 + (page_size - 1),
379 __kmp_get_host_node(p1));
380 if (p1 < p2) {
381 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
382 (char *)p2 + (page_size - 1),
383 __kmp_get_host_node(p2));
384 }
385#endif
386 }
387 }
388 } else
389 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
390 }
391#endif /* KMP_PRINT_DATA_PLACEMENT */
392 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
393}
394
395void __kmp_warn(char const *format, ...) {
396 char buffer[MAX_MESSAGE];
397 va_list ap;
398
399 if (__kmp_generate_warnings == kmp_warnings_off) {
400 return;
401 }
402
403 va_start(ap, format);
404
405 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
406 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
407 __kmp_vprintf(kmp_err, buffer, ap);
408 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
409
410 va_end(ap);
411}
412
413void __kmp_abort_process() {
414 // Later threads may stall here, but that's ok because abort() will kill them.
415 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
416
417 if (__kmp_debug_buf) {
418 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000419 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000420
421 if (KMP_OS_WINDOWS) {
422 // Let other threads know of abnormal termination and prevent deadlock
423 // if abort happened during library initialization or shutdown
424 __kmp_global.g.g_abort = SIGABRT;
425
426 /* On Windows* OS by default abort() causes pop-up error box, which stalls
427 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
428 boxes. _set_abort_behavior() works well, but this function is not
429 available in VS7 (this is not problem for DLL, but it is a problem for
430 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
431 help, at least in some versions of MS C RTL.
432
433 It seems following sequence is the only way to simulate abort() and
434 avoid pop-up error box. */
435 raise(SIGABRT);
436 _exit(3); // Just in case, if signal ignored, exit anyway.
437 } else {
438 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000439 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000440
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000443
444} // __kmp_abort_process
445
Jonathan Peyton30419822017-05-12 18:01:32 +0000446void __kmp_abort_thread(void) {
447 // TODO: Eliminate g_abort global variable and this function.
448 // In case of abort just call abort(), it will kill all the threads.
449 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450} // __kmp_abort_thread
451
Jonathan Peyton30419822017-05-12 18:01:32 +0000452/* Print out the storage map for the major kmp_info_t thread data structures
453 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454
Jonathan Peyton30419822017-05-12 18:01:32 +0000455static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
456 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
457 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000458
Jonathan Peyton30419822017-05-12 18:01:32 +0000459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
460 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000461
Jonathan Peyton30419822017-05-12 18:01:32 +0000462 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
463 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464
Jonathan Peyton30419822017-05-12 18:01:32 +0000465 __kmp_print_storage_map_gtid(
466 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000468
Jonathan Peyton30419822017-05-12 18:01:32 +0000469 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier + 1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
472 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
475 &thr->th.th_bar[bs_forkjoin_barrier + 1],
476 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
477 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479#if KMP_FAST_REDUCTION_BARRIER
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier + 1],
482 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
483 gtid);
484#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000485}
486
Jonathan Peyton30419822017-05-12 18:01:32 +0000487/* Print out the storage map for the major kmp_team_t team data structures
488 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489
Jonathan Peyton30419822017-05-12 18:01:32 +0000490static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
491 int team_id, int num_thr) {
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
497 &team->t.t_bar[bs_last_barrier],
498 sizeof(kmp_balign_team_t) * bs_last_barrier,
499 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
502 &team->t.t_bar[bs_plain_barrier + 1],
503 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
504 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
507 &team->t.t_bar[bs_forkjoin_barrier + 1],
508 sizeof(kmp_balign_team_t),
509 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511#if KMP_FAST_REDUCTION_BARRIER
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
513 &team->t.t_bar[bs_reduction_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[reduction]", header, team_id);
516#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517
Jonathan Peyton30419822017-05-12 18:01:32 +0000518 __kmp_print_storage_map_gtid(
519 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 __kmp_print_storage_map_gtid(
523 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
524 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525
Jonathan Peyton30419822017-05-12 18:01:32 +0000526 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
527 &team->t.t_disp_buffer[num_disp_buff],
528 sizeof(dispatch_shared_info_t) * num_disp_buff,
529 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
532 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
533 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534}
535
536static void __kmp_init_allocator() {}
537static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000538
539/* ------------------------------------------------------------------------ */
540
Jonathan Peyton99016992015-05-26 17:32:53 +0000541#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000542#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
545 // TODO: Change to __kmp_break_bootstrap_lock().
546 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000547}
548
Jonathan Peyton30419822017-05-12 18:01:32 +0000549static void __kmp_reset_locks_on_process_detach(int gtid_req) {
550 int i;
551 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000552
Jonathan Peyton30419822017-05-12 18:01:32 +0000553 // PROCESS_DETACH is expected to be called by a thread that executes
554 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
555 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
556 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
557 // threads can be still alive here, although being about to be terminated. The
558 // threads in the array with ds_thread==0 are most suspicious. Actually, it
559 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000560
Jonathan Peyton30419822017-05-12 18:01:32 +0000561 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 // Let's check that there are no other alive threads registered with the OMP
564 // lib.
565 while (1) {
566 thread_count = 0;
567 for (i = 0; i < __kmp_threads_capacity; ++i) {
568 if (!__kmp_threads)
569 continue;
570 kmp_info_t *th = __kmp_threads[i];
571 if (th == NULL)
572 continue;
573 int gtid = th->th.th_info.ds.ds_gtid;
574 if (gtid == gtid_req)
575 continue;
576 if (gtid < 0)
577 continue;
578 DWORD exit_val;
579 int alive = __kmp_is_thread_alive(th, &exit_val);
580 if (alive) {
581 ++thread_count;
582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 if (thread_count == 0)
585 break; // success
586 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 // Assume that I'm alone. Now it might be safe to check and reset locks.
589 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
590 __kmp_reset_lock(&__kmp_forkjoin_lock);
591#ifdef KMP_DEBUG
592 __kmp_reset_lock(&__kmp_stdio_lock);
593#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594}
595
Jonathan Peyton30419822017-05-12 18:01:32 +0000596BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
597 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598
Jonathan Peyton30419822017-05-12 18:01:32 +0000599 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 case DLL_PROCESS_ATTACH:
602 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603
604 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000605
606 case DLL_PROCESS_DETACH:
607 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
608
609 if (lpReserved != NULL) {
610 // lpReserved is used for telling the difference:
611 // lpReserved == NULL when FreeLibrary() was called,
612 // lpReserved != NULL when the process terminates.
613 // When FreeLibrary() is called, worker threads remain alive. So they will
614 // release the forkjoin lock by themselves. When the process terminates,
615 // worker threads disappear triggering the problem of unreleased forkjoin
616 // lock as described below.
617
618 // A worker thread can take the forkjoin lock. The problem comes up if
619 // that worker thread becomes dead before it releases the forkjoin lock.
620 // The forkjoin lock remains taken, while the thread executing
621 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
622 // to take the forkjoin lock and will always fail, so that the application
623 // will never finish [normally]. This scenario is possible if
624 // __kmpc_end() has not been executed. It looks like it's not a corner
625 // case, but common cases:
626 // - the main function was compiled by an alternative compiler;
627 // - the main function was compiled by icl but without /Qopenmp
628 // (application with plugins);
629 // - application terminates by calling C exit(), Fortran CALL EXIT() or
630 // Fortran STOP.
631 // - alive foreign thread prevented __kmpc_end from doing cleanup.
632 //
633 // This is a hack to work around the problem.
634 // TODO: !!! figure out something better.
635 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
636 }
637
638 __kmp_internal_end_library(__kmp_gtid_get_specific());
639
640 return TRUE;
641
642 case DLL_THREAD_ATTACH:
643 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
644
645 /* if we want to register new siblings all the time here call
646 * __kmp_get_gtid(); */
647 return TRUE;
648
649 case DLL_THREAD_DETACH:
650 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
651
652 __kmp_internal_end_thread(__kmp_gtid_get_specific());
653 return TRUE;
654 }
655
656 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657}
658
Jonathan Peyton30419822017-05-12 18:01:32 +0000659#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000660#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000661
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662/* Change the library type to "status" and return the old type */
663/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000664int __kmp_change_library(int status) {
665 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000666
Jonathan Peyton30419822017-05-12 18:01:32 +0000667 old_status = __kmp_yield_init &
668 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669
Jonathan Peyton30419822017-05-12 18:01:32 +0000670 if (status) {
671 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
672 } else {
673 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675
Jonathan Peyton30419822017-05-12 18:01:32 +0000676 return old_status; // return previous setting of whether
677 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678}
679
Jonathan Peyton30419822017-05-12 18:01:32 +0000680/* __kmp_parallel_deo -- Wait until it's our turn. */
681void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
682 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000684 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685#endif /* BUILD_PARALLEL_ORDERED */
686
Jonathan Peyton30419822017-05-12 18:01:32 +0000687 if (__kmp_env_consistency_check) {
688 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000689#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 if (!team->t.t_serialized) {
697 KMP_MB();
698 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
699 KMP_EQ, NULL);
700 KMP_MB();
701 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702#endif /* BUILD_PARALLEL_ORDERED */
703}
704
Jonathan Peyton30419822017-05-12 18:01:32 +0000705/* __kmp_parallel_dxo -- Signal the next task. */
706void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
707 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000709 int tid = __kmp_tid_from_gtid(gtid);
710 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711#endif /* BUILD_PARALLEL_ORDERED */
712
Jonathan Peyton30419822017-05-12 18:01:32 +0000713 if (__kmp_env_consistency_check) {
714 if (__kmp_threads[gtid]->th.th_root->r.r_active)
715 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
716 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000717#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000718 if (!team->t.t_serialized) {
719 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720
Jonathan Peyton30419822017-05-12 18:01:32 +0000721 /* use the tid of the next thread in this team */
722 /* TODO replace with general release procedure */
723 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724
Jonathan Peyton30419822017-05-12 18:01:32 +0000725 KMP_MB(); /* Flush all pending memory write invalidates. */
726 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000727#endif /* BUILD_PARALLEL_ORDERED */
728}
729
730/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731/* The BARRIER for a SINGLE process section is always explicit */
732
Jonathan Peyton30419822017-05-12 18:01:32 +0000733int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
734 int status;
735 kmp_info_t *th;
736 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 if (!TCR_4(__kmp_init_parallel))
739 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740
Jonathan Peyton30419822017-05-12 18:01:32 +0000741 th = __kmp_threads[gtid];
742 team = th->th.th_team;
743 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744
Jonathan Peyton30419822017-05-12 18:01:32 +0000745 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 if (team->t.t_serialized) {
748 status = 1;
749 } else {
750 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751
Jonathan Peyton30419822017-05-12 18:01:32 +0000752 ++th->th.th_local.this_construct;
753 /* try to set team count to thread count--success means thread got the
754 single block */
755 /* TODO: Should this be acquire or release? */
756 if (team->t.t_construct == old_this) {
757 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
758 th->th.th_local.this_construct);
759 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000760#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000761 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
762 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000763#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000765#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 team->t.t_active_level ==
767 1) { // Only report metadata by master of active team at level 1
768 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000770#endif /* USE_ITT_BUILD */
771 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772
Jonathan Peyton30419822017-05-12 18:01:32 +0000773 if (__kmp_env_consistency_check) {
774 if (status && push_ws) {
775 __kmp_push_workshare(gtid, ct_psingle, id_ref);
776 } else {
777 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000778 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000779 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 if (status) {
782 __kmp_itt_single_start(gtid);
783 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000785 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786}
787
Jonathan Peyton30419822017-05-12 18:01:32 +0000788void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000790 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 if (__kmp_env_consistency_check)
793 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794}
795
Jonathan Peyton30419822017-05-12 18:01:32 +0000796/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797 * how many threads we can use
798 * set_nproc is the number of threads requested for the team
799 * returns 0 if we should serialize or only use one thread,
800 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000801 * The forkjoin lock is held by the caller. */
802static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
803 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000805 ,
806 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 ) {
809 int capacity;
810 int new_nthreads;
811 KMP_DEBUG_ASSERT(__kmp_init_serial);
812 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 // If dyn-var is set, dynamically adjust the number of desired threads,
815 // according to the method specified by dynamic_mode.
816 new_nthreads = set_nthreads;
817 if (!get__dynamic_2(parent_team, master_tid)) {
818 ;
819 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000820#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000821 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
822 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
823 if (new_nthreads == 1) {
824 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
825 "reservation to 1 thread\n",
826 master_tid));
827 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
833 }
834 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000836 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
837 new_nthreads = __kmp_avail_proc - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
839 if (new_nthreads <= 1) {
840 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
841 "reservation to 1 thread\n",
842 master_tid));
843 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000845 if (new_nthreads < set_nthreads) {
846 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
847 "reservation to %d threads\n",
848 master_tid, new_nthreads));
849 } else {
850 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000851 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000852 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
853 if (set_nthreads > 2) {
854 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
855 new_nthreads = (new_nthreads % set_nthreads) + 1;
856 if (new_nthreads == 1) {
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
858 "reservation to 1 thread\n",
859 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 }
862 if (new_nthreads < set_nthreads) {
863 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
864 "reservation to %d threads\n",
865 master_tid, new_nthreads));
866 }
867 }
868 } else {
869 KMP_ASSERT(0);
870 }
871
Jonathan Peytonf4392462017-07-27 20:58:41 +0000872 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 if (__kmp_nth + new_nthreads -
874 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
875 __kmp_max_nth) {
876 int tl_nthreads = __kmp_max_nth - __kmp_nth +
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
878 if (tl_nthreads <= 0) {
879 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880 }
881
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 // If dyn-var is false, emit a 1-time warning.
883 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 __kmp_reserve_warn = 1;
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
887 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
888 }
889 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000890 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
891 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 master_tid));
893 return 1;
894 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000895 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
896 "reservation to %d threads\n",
897 master_tid, tl_nthreads));
898 new_nthreads = tl_nthreads;
899 }
900
901 // Respect OMP_THREAD_LIMIT
902 if (root->r.r_cg_nthreads + new_nthreads -
903 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
904 __kmp_cg_max_nth) {
905 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
906 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
907 if (tl_nthreads <= 0) {
908 tl_nthreads = 1;
909 }
910
911 // If dyn-var is false, emit a 1-time warning.
912 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
913 __kmp_reserve_warn = 1;
914 __kmp_msg(kmp_ms_warning,
915 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
916 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
917 }
918 if (tl_nthreads == 1) {
919 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
920 "reduced reservation to 1 thread\n",
921 master_tid));
922 return 1;
923 }
924 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000925 "reservation to %d threads\n",
926 master_tid, tl_nthreads));
927 new_nthreads = tl_nthreads;
928 }
929
930 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000931 // See comment in __kmp_register_root() about the adjustment if
932 // __kmp_threads[0] == NULL.
933 capacity = __kmp_threads_capacity;
934 if (TCR_PTR(__kmp_threads[0]) == NULL) {
935 --capacity;
936 }
937 if (__kmp_nth + new_nthreads -
938 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
939 capacity) {
940 // Expand the threads array.
941 int slotsRequired = __kmp_nth + new_nthreads -
942 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
943 capacity;
944 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
945 if (slotsAdded < slotsRequired) {
946 // The threads array was not expanded enough.
947 new_nthreads -= (slotsRequired - slotsAdded);
948 KMP_ASSERT(new_nthreads >= 1);
949
950 // If dyn-var is false, emit a 1-time warning.
951 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
952 __kmp_reserve_warn = 1;
953 if (__kmp_tp_cached) {
954 __kmp_msg(kmp_ms_warning,
955 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
956 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
957 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
958 } else {
959 __kmp_msg(kmp_ms_warning,
960 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
961 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
962 }
963 }
964 }
965 }
966
Jonathan Peyton642688b2017-06-01 16:46:36 +0000967#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000968 if (new_nthreads == 1) {
969 KC_TRACE(10,
970 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
971 "dead roots and rechecking; requested %d threads\n",
972 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000973 } else {
974 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
975 " %d threads\n",
976 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000977 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000978#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000979 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000980}
981
Jonathan Peyton30419822017-05-12 18:01:32 +0000982/* Allocate threads from the thread pool and assign them to the new team. We are
983 assured that there are enough threads available, because we checked on that
984 earlier within critical section forkjoin */
985static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
986 kmp_info_t *master_th, int master_gtid) {
987 int i;
988 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000989
Jonathan Peyton30419822017-05-12 18:01:32 +0000990 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
991 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
992 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000993
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 /* first, let's setup the master thread */
995 master_th->th.th_info.ds.ds_tid = 0;
996 master_th->th.th_team = team;
997 master_th->th.th_team_nproc = team->t.t_nproc;
998 master_th->th.th_team_master = master_th;
999 master_th->th.th_team_serialized = FALSE;
1000 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001001
Jonathan Peyton30419822017-05-12 18:01:32 +00001002/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001003#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001004 use_hot_team = 0;
1005 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1006 if (hot_teams) { // hot teams array is not allocated if
1007 // KMP_HOT_TEAMS_MAX_LEVEL=0
1008 int level = team->t.t_active_level - 1; // index in array of hot teams
1009 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1010 if (master_th->th.th_teams_size.nteams > 1) {
1011 ++level; // level was not increased in teams construct for
1012 // team_of_masters
1013 }
1014 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1015 master_th->th.th_teams_level == team->t.t_level) {
1016 ++level; // level was not increased in teams construct for
1017 // team_of_workers before the parallel
1018 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001019 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001020 if (level < __kmp_hot_teams_max_level) {
1021 if (hot_teams[level].hot_team) {
1022 // hot team has already been allocated for given level
1023 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1024 use_hot_team = 1; // the team is ready to use
1025 } else {
1026 use_hot_team = 0; // AC: threads are not allocated yet
1027 hot_teams[level].hot_team = team; // remember new hot team
1028 hot_teams[level].hot_team_nth = team->t.t_nproc;
1029 }
1030 } else {
1031 use_hot_team = 0;
1032 }
1033 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001034#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001035 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001036#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001037 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001038
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 /* install the master thread */
1040 team->t.t_threads[0] = master_th;
1041 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001042
Jonathan Peyton30419822017-05-12 18:01:32 +00001043 /* now, install the worker threads */
1044 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001045
Jonathan Peyton30419822017-05-12 18:01:32 +00001046 /* fork or reallocate a new thread and install it in team */
1047 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1048 team->t.t_threads[i] = thr;
1049 KMP_DEBUG_ASSERT(thr);
1050 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1051 /* align team and thread arrived states */
1052 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1053 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1054 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1055 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1056 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1057 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001058#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001059 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1060 thr->th.th_teams_level = master_th->th.th_teams_level;
1061 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001062#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001063 { // Initialize threads' barrier data.
1064 int b;
1065 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1066 for (b = 0; b < bs_last_barrier; ++b) {
1067 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1068 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001069#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001070 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001071#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001072 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001073 }
1074 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075
Alp Toker98758b02014-03-02 04:12:06 +00001076#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001077 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080
Jonathan Peyton30419822017-05-12 18:01:32 +00001081 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082}
1083
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001084#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001085// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001086// We try to avoid unnecessary writes to the relevant cache line in the team
1087// structure, so we don't make changes unless they are needed.
1088inline static void propagateFPControl(kmp_team_t *team) {
1089 if (__kmp_inherit_fp_control) {
1090 kmp_int16 x87_fpu_control_word;
1091 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001092
Jonathan Peyton30419822017-05-12 18:01:32 +00001093 // Get master values of FPU control flags (both X87 and vector)
1094 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1095 __kmp_store_mxcsr(&mxcsr);
1096 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001097
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001098 // There is no point looking at t_fp_control_saved here.
1099 // If it is TRUE, we still have to update the values if they are different
1100 // from those we now have. If it is FALSE we didn't save anything yet, but
1101 // our objective is the same. We have to ensure that the values in the team
1102 // are the same as those we have.
1103 // So, this code achieves what we need whether or not t_fp_control_saved is
1104 // true. By checking whether the value needs updating we avoid unnecessary
1105 // writes that would put the cache-line into a written state, causing all
1106 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001107 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1108 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1109 // Although we don't use this value, other code in the runtime wants to know
1110 // whether it should restore them. So we must ensure it is correct.
1111 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1112 } else {
1113 // Similarly here. Don't write to this cache-line in the team structure
1114 // unless we have to.
1115 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1116 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001117}
1118
Jonathan Peyton30419822017-05-12 18:01:32 +00001119// Do the opposite, setting the hardware registers to the updated values from
1120// the team.
1121inline static void updateHWFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1123 // Only reset the fp control regs if they have been changed in the team.
1124 // the parallel region that we are exiting.
1125 kmp_int16 x87_fpu_control_word;
1126 kmp_uint32 mxcsr;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001130
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1132 __kmp_clear_x87_fpu_status_word();
1133 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001134 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001135
1136 if (team->t.t_mxcsr != mxcsr) {
1137 __kmp_load_mxcsr(&team->t.t_mxcsr);
1138 }
1139 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140}
1141#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001142#define propagateFPControl(x) ((void)0)
1143#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1145
Jonathan Peyton30419822017-05-12 18:01:32 +00001146static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1147 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148
Jonathan Peyton30419822017-05-12 18:01:32 +00001149/* Run a parallel region that has been serialized, so runs only in a team of the
1150 single master thread. */
1151void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1152 kmp_info_t *this_thr;
1153 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001154
Jonathan Peyton30419822017-05-12 18:01:32 +00001155 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001156
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 /* Skip all this code for autopar serialized loops since it results in
1158 unacceptable overhead */
1159 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1160 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001161
Jonathan Peyton30419822017-05-12 18:01:32 +00001162 if (!TCR_4(__kmp_init_parallel))
1163 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001164
Jonathan Peyton30419822017-05-12 18:01:32 +00001165 this_thr = __kmp_threads[global_tid];
1166 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001167
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 /* utilize the serialized team held by this thread */
1169 KMP_DEBUG_ASSERT(serial_team);
1170 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001171
Jonathan Peyton30419822017-05-12 18:01:32 +00001172 if (__kmp_tasking_mode != tskm_immediate_exec) {
1173 KMP_DEBUG_ASSERT(
1174 this_thr->th.th_task_team ==
1175 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1176 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1177 NULL);
1178 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1179 "team %p, new task_team = NULL\n",
1180 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1181 this_thr->th.th_task_team = NULL;
1182 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001183
1184#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1186 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1187 proc_bind = proc_bind_false;
1188 } else if (proc_bind == proc_bind_default) {
1189 // No proc_bind clause was specified, so use the current value
1190 // of proc-bind-var for this parallel region.
1191 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1192 }
1193 // Reset for next parallel region
1194 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001195#endif /* OMP_40_ENABLED */
1196
Joachim Protze82e94a52017-11-01 10:08:30 +00001197#if OMPT_SUPPORT
1198 ompt_data_t ompt_parallel_data;
1199 ompt_parallel_data.ptr = NULL;
1200 ompt_data_t *implicit_task_data;
1201 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1202 if (ompt_enabled.enabled &&
1203 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1204
1205 ompt_task_info_t *parent_task_info;
1206 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1207
1208 parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
1209 if (ompt_enabled.ompt_callback_parallel_begin) {
1210 int team_size = 1;
1211
1212 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1213 &(parent_task_info->task_data), &(parent_task_info->frame),
1214 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1215 }
1216 }
1217#endif // OMPT_SUPPORT
1218
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 if (this_thr->th.th_team != serial_team) {
1220 // Nested level will be an index in the nested nthreads array
1221 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001222
Jonathan Peyton30419822017-05-12 18:01:32 +00001223 if (serial_team->t.t_serialized) {
1224 /* this serial team was already used
1225 TODO increase performance by making this locks more specific */
1226 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001227
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001229
Jonathan Peyton30419822017-05-12 18:01:32 +00001230 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001231#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001232 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001234#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001235 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001236#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001237 &this_thr->th.th_current_task->td_icvs,
1238 0 USE_NESTED_HOT_ARG(NULL));
1239 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1240 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001241
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 /* setup new serialized team and install it */
1243 new_team->t.t_threads[0] = this_thr;
1244 new_team->t.t_parent = this_thr->th.th_team;
1245 serial_team = new_team;
1246 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247
Jonathan Peyton30419822017-05-12 18:01:32 +00001248 KF_TRACE(
1249 10,
1250 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1251 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001252
Jonathan Peyton30419822017-05-12 18:01:32 +00001253 /* TODO the above breaks the requirement that if we run out of resources,
1254 then we can still guarantee that serialized teams are ok, since we may
1255 need to allocate a new one */
1256 } else {
1257 KF_TRACE(
1258 10,
1259 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1260 global_tid, serial_team));
1261 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001262
Jonathan Peyton30419822017-05-12 18:01:32 +00001263 /* we have to initialize this serial team */
1264 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1265 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1266 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1267 serial_team->t.t_ident = loc;
1268 serial_team->t.t_serialized = 1;
1269 serial_team->t.t_nproc = 1;
1270 serial_team->t.t_parent = this_thr->th.th_team;
1271 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1272 this_thr->th.th_team = serial_team;
1273 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001274
Jonathan Peyton30419822017-05-12 18:01:32 +00001275 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1276 this_thr->th.th_current_task));
1277 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1278 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001279
Jonathan Peyton30419822017-05-12 18:01:32 +00001280 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1283 implicit task for each serialized task represented by
1284 team->t.t_serialized? */
1285 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1286 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001287
Jonathan Peyton30419822017-05-12 18:01:32 +00001288 // Thread value exists in the nested nthreads array for the next nested
1289 // level
1290 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1291 this_thr->th.th_current_task->td_icvs.nproc =
1292 __kmp_nested_nth.nth[level + 1];
1293 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001294
1295#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001296 if (__kmp_nested_proc_bind.used &&
1297 (level + 1 < __kmp_nested_proc_bind.used)) {
1298 this_thr->th.th_current_task->td_icvs.proc_bind =
1299 __kmp_nested_proc_bind.bind_types[level + 1];
1300 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001301#endif /* OMP_40_ENABLED */
1302
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001303#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001304 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001305#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001306 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 /* set thread cache values */
1309 this_thr->th.th_team_nproc = 1;
1310 this_thr->th.th_team_master = this_thr;
1311 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001312
Jonathan Peyton30419822017-05-12 18:01:32 +00001313 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1314 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315
Jonathan Peyton30419822017-05-12 18:01:32 +00001316 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 /* check if we need to allocate dispatch buffers stack */
1319 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1320 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1321 serial_team->t.t_dispatch->th_disp_buffer =
1322 (dispatch_private_info_t *)__kmp_allocate(
1323 sizeof(dispatch_private_info_t));
1324 }
1325 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001326
Jonathan Peyton30419822017-05-12 18:01:32 +00001327 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328
Jonathan Peyton30419822017-05-12 18:01:32 +00001329 } else {
1330 /* this serialized team is already being used,
1331 * that's fine, just add another nested level */
1332 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1333 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1334 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1335 ++serial_team->t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 // Nested level will be an index in the nested nthreads array
1339 int level = this_thr->th.th_team->t.t_level;
1340 // Thread value exists in the nested nthreads array for the next nested
1341 // level
1342 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1343 this_thr->th.th_current_task->td_icvs.nproc =
1344 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001345 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001346 serial_team->t.t_level++;
1347 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->t.t_level));
1350
1351 /* allocate/push dispatch buffers stack */
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1353 {
1354 dispatch_private_info_t *disp_buffer =
1355 (dispatch_private_info_t *)__kmp_allocate(
1356 sizeof(dispatch_private_info_t));
1357 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1358 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1359 }
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1361
1362 KMP_MB();
1363 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001364#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001366#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001367
Jonathan Peyton30419822017-05-12 18:01:32 +00001368 if (__kmp_env_consistency_check)
1369 __kmp_push_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001370#if OMPT_SUPPORT
1371 serial_team->t.ompt_team_info.master_return_address = codeptr;
1372 if (ompt_enabled.enabled &&
1373 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1374 OMPT_CUR_TASK_INFO(this_thr)
1375 ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
1376
1377 ompt_lw_taskteam_t lw_taskteam;
1378 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1379 &ompt_parallel_data, codeptr);
1380
1381 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1382 // don't use lw_taskteam after linking. content was swaped
1383
1384 /* OMPT implicit task begin */
1385 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1386 if (ompt_enabled.ompt_callback_implicit_task) {
1387 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1388 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1389 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1390 }
1391
1392 /* OMPT state */
1393 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1394 OMPT_CUR_TASK_INFO(this_thr)
1395 ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
1396 }
1397#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001398}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001399
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400/* most of the work for a fork */
1401/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001402int __kmp_fork_call(ident_t *loc, int gtid,
1403 enum fork_context_e call_context, // Intel, GNU, ...
Joachim Protze82e94a52017-11-01 10:08:30 +00001404 kmp_int32 argc, microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001405/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001406#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001407 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001409 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001410#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001411 ) {
1412 void **argv;
1413 int i;
1414 int master_tid;
1415 int master_this_cons;
1416 kmp_team_t *team;
1417 kmp_team_t *parent_team;
1418 kmp_info_t *master_th;
1419 kmp_root_t *root;
1420 int nthreads;
1421 int master_active;
1422 int master_set_numthreads;
1423 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001424#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001425 int active_level;
1426 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001429 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001431 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001432 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001433 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434
Jonathan Peyton30419822017-05-12 18:01:32 +00001435 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1436 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1437 /* Some systems prefer the stack for the root thread(s) to start with */
1438 /* some gap from the parent stack to prevent false sharing. */
1439 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1440 /* These 2 lines below are so this does not get optimized out */
1441 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1442 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001443 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001444
1445 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001446 KMP_DEBUG_ASSERT(
1447 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1448 if (!TCR_4(__kmp_init_parallel))
1449 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450
1451 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001452 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1453 // shutdown
1454 parent_team = master_th->th.th_team;
1455 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001456 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001457 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001458 master_active = root->r.r_active;
1459 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001460
1461#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001462 ompt_data_t ompt_parallel_data;
1463 ompt_parallel_data.ptr = NULL;
1464 ompt_data_t *parent_task_data;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001465 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001466 ompt_data_t *implicit_task_data;
1467 void *return_address = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001468
Joachim Protze82e94a52017-11-01 10:08:30 +00001469 if (ompt_enabled.enabled) {
1470 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1471 NULL, NULL);
1472 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001473 }
1474#endif
1475
Jim Cownie5e8470a2013-09-27 10:38:44 +00001476 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001477 level = parent_team->t.t_level;
1478 // used to launch non-serial teams even if nested is not allowed
1479 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001480#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001481 // needed to check nesting inside the teams
1482 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001483#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001484#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001485 p_hot_teams = &master_th->th.th_hot_teams;
1486 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1487 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1488 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1489 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001490 // it is either actual or not needed (when active_level > 0)
1491 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001492 }
1493#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001495#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001496 if (ompt_enabled.enabled) {
1497 if (ompt_enabled.ompt_callback_parallel_begin) {
1498 int team_size = master_set_numthreads
1499 ? master_set_numthreads
1500 : get__nproc_2(parent_team, master_tid);
1501 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1502 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1503 OMPT_INVOKER(call_context), return_address);
1504 }
1505 master_th->th.ompt_thread_info.state = omp_state_overhead;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001506 }
1507#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509 master_th->th.th_ident = loc;
1510
1511#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001512 if (master_th->th.th_teams_microtask && ap &&
1513 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1514 // AC: This is start of parallel that is nested inside teams construct.
1515 // The team is actual (hot), all workers are ready at the fork barrier.
1516 // No lock needed to initialize the team a bit, then free workers.
1517 parent_team->t.t_ident = loc;
1518 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1519 parent_team->t.t_argc = argc;
1520 argv = (void **)parent_team->t.t_argv;
1521 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001523#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001524 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001525#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001526 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001527#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001528 // Increment our nested depth levels, but not increase the serialization
1529 if (parent_team == master_th->th.th_serial_team) {
1530 // AC: we are in serialized parallel
1531 __kmpc_serialized_parallel(loc, gtid);
1532 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1533 // AC: need this in order enquiry functions work
1534 // correctly, will restore at join time
1535 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001536#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001537 void *dummy;
1538 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001539
Jonathan Peyton30419822017-05-12 18:01:32 +00001540 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001541
Joachim Protze82e94a52017-11-01 10:08:30 +00001542 if (ompt_enabled.enabled) {
1543 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1544 &ompt_parallel_data, return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001545 exit_runtime_p =
1546 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001547
Joachim Protze82e94a52017-11-01 10:08:30 +00001548 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1549 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001550
Jonathan Peyton30419822017-05-12 18:01:32 +00001551 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001552 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1553 if (ompt_enabled.ompt_callback_implicit_task) {
1554 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1555 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1556 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001557 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001558
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001560 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001561 } else {
1562 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001564#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001565
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001566 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1568 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1569 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1570#if OMPT_SUPPORT
1571 ,
1572 exit_runtime_p
1573#endif
1574 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576
Jonathan Peyton30419822017-05-12 18:01:32 +00001577#if OMPT_SUPPORT
1578 *exit_runtime_p = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001579 if (ompt_enabled.enabled) {
1580 OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL;
1581 if (ompt_enabled.ompt_callback_implicit_task) {
1582 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1583 ompt_scope_end, NULL, implicit_task_data, 1,
1584 __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001585 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001586 __ompt_lw_taskteam_unlink(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001587
Joachim Protze82e94a52017-11-01 10:08:30 +00001588 if (ompt_enabled.ompt_callback_parallel_end) {
1589 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1590 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1591 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001592 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001593 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 }
1595#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001596 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 }
1598
1599 parent_team->t.t_pkfn = microtask;
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 parent_team->t.t_invoke = invoker;
1601 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1602 parent_team->t.t_active_level++;
1603 parent_team->t.t_level++;
1604
1605 /* Change number of threads in the team if requested */
1606 if (master_set_numthreads) { // The parallel has num_threads clause
1607 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1608 // AC: only can reduce number of threads dynamically, can't increase
1609 kmp_info_t **other_threads = parent_team->t.t_threads;
1610 parent_team->t.t_nproc = master_set_numthreads;
1611 for (i = 0; i < master_set_numthreads; ++i) {
1612 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1613 }
1614 // Keep extra threads hot in the team for possible next parallels
1615 }
1616 master_th->th.th_set_nproc = 0;
1617 }
1618
1619#if USE_DEBUGGER
1620 if (__kmp_debugging) { // Let debugger override number of threads.
1621 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001622 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001624 }
1625 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001626#endif
1627
1628 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1629 "master_th=%p, gtid=%d\n",
1630 root, parent_team, master_th, gtid));
1631 __kmp_internal_fork(loc, gtid, parent_team);
1632 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1633 "master_th=%p, gtid=%d\n",
1634 root, parent_team, master_th, gtid));
1635
1636 /* Invoke microtask for MASTER thread */
1637 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1638 parent_team->t.t_id, parent_team->t.t_pkfn));
1639
1640 {
1641 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1642 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1643 if (!parent_team->t.t_invoke(gtid)) {
1644 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
1645 }
1646 }
1647 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1648 parent_team->t.t_id, parent_team->t.t_pkfn));
1649 KMP_MB(); /* Flush all pending memory write invalidates. */
1650
1651 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1652
1653 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001654 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001655#endif /* OMP_40_ENABLED */
1656
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001658 if (__kmp_tasking_mode != tskm_immediate_exec) {
1659 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1660 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001661 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001662#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001663
Jonathan Peyton30419822017-05-12 18:01:32 +00001664 if (parent_team->t.t_active_level >=
1665 master_th->th.th_current_task->td_icvs.max_active_levels) {
1666 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001667 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001668#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001669 int enter_teams = ((ap == NULL && active_level == 0) ||
1670 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001671#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001672 nthreads =
1673 master_set_numthreads
1674 ? master_set_numthreads
1675 : get__nproc_2(
1676 parent_team,
1677 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001678
Jonathan Peyton30419822017-05-12 18:01:32 +00001679 // Check if we need to take forkjoin lock? (no need for serialized
1680 // parallel out of teams construct). This code moved here from
1681 // __kmp_reserve_threads() to speedup nested serialized parallels.
1682 if (nthreads > 1) {
1683 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001684#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001686#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001687 )) ||
1688 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001689 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1690 " threads\n",
1691 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001692 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001693 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 }
1695 if (nthreads > 1) {
1696 /* determine how many new threads we can use */
1697 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001698 nthreads = __kmp_reserve_threads(
1699 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001701 /* AC: If we execute teams from parallel region (on host), then
1702 teams should be created but each can only have 1 thread if
1703 nesting is disabled. If teams called from serial region, then
1704 teams and their threads should be created regardless of the
1705 nesting setting. */
1706 ,
1707 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001708#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001709 );
1710 if (nthreads == 1) {
1711 // Free lock for single thread execution here; for multi-thread
1712 // execution it will be freed later after team of threads created
1713 // and initialized
1714 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001715 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001717 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001719
Jonathan Peyton30419822017-05-12 18:01:32 +00001720 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001721 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722
Jim Cownie5e8470a2013-09-27 10:38:44 +00001723 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001724 if (nthreads == 1) {
1725/* josh todo: hypothetical question: what do we do for OS X*? */
1726#if KMP_OS_LINUX && \
1727 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1728 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001729#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001730 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1731#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1732 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001733
Jonathan Peyton30419822017-05-12 18:01:32 +00001734 KA_TRACE(20,
1735 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738
Jonathan Peyton30419822017-05-12 18:01:32 +00001739 if (call_context == fork_context_intel) {
1740 /* TODO this sucks, use the compiler itself to pass args! :) */
1741 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001743 if (!ap) {
1744 // revert change made in __kmpc_serialized_parallel()
1745 master_th->th.th_serial_team->t.t_level--;
1746// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001747
1748#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001749 void *dummy;
1750 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001751 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001752
Jonathan Peyton30419822017-05-12 18:01:32 +00001753 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001754
Joachim Protze82e94a52017-11-01 10:08:30 +00001755 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001756 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001757 &ompt_parallel_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758
Joachim Protze82e94a52017-11-01 10:08:30 +00001759 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1760 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761
Joachim Protze82e94a52017-11-01 10:08:30 +00001762 task_info = OMPT_CUR_TASK_INFO(master_th);
1763 exit_runtime_p = &(task_info->frame.exit_runtime_frame);
1764 if (ompt_enabled.ompt_callback_implicit_task) {
1765 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1766 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1767 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001768 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001769
Jonathan Peyton30419822017-05-12 18:01:32 +00001770 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001771 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001772 } else {
1773 exit_runtime_p = &dummy;
1774 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001775#endif
1776
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 {
1778 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1779 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1780 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1781 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001782#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001783 ,
1784 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001785#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001786 );
1787 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001788
1789#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001790 if (ompt_enabled.enabled) {
1791 exit_runtime_p = NULL;
1792 if (ompt_enabled.ompt_callback_implicit_task) {
1793 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1794 ompt_scope_end, NULL, &(task_info->task_data), 1,
1795 __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001796 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001797
Jonathan Peyton30419822017-05-12 18:01:32 +00001798 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001799 if (ompt_enabled.ompt_callback_parallel_end) {
1800 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1801 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1802 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001804 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001805 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001807 } else if (microtask == (microtask_t)__kmp_teams_master) {
1808 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1809 master_th->th.th_serial_team);
1810 team = master_th->th.th_team;
1811 // team->t.t_pkfn = microtask;
1812 team->t.t_invoke = invoker;
1813 __kmp_alloc_argv_entries(argc, team, TRUE);
1814 team->t.t_argc = argc;
1815 argv = (void **)team->t.t_argv;
1816 if (ap) {
1817 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001818// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001819#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001820 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001821#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001822 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001823#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 } else {
1825 for (i = 0; i < argc; ++i)
1826 // Get args from parent team for teams construct
1827 argv[i] = parent_team->t.t_argv[i];
1828 }
1829 // AC: revert change made in __kmpc_serialized_parallel()
1830 // because initial code in teams should have level=0
1831 team->t.t_level--;
1832 // AC: call special invoker for outer "parallel" of teams construct
1833 {
1834 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1835 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1836 invoker(gtid);
1837 }
1838 } else {
1839#endif /* OMP_40_ENABLED */
1840 argv = args;
1841 for (i = argc - 1; i >= 0; --i)
1842// TODO: revert workaround for Intel(R) 64 tracker #96
1843#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1844 *argv++ = va_arg(*ap, void *);
1845#else
1846 *argv++ = va_arg(ap, void *);
1847#endif
1848 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001849
1850#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 void *dummy;
1852 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001853 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001854
Jonathan Peyton30419822017-05-12 18:01:32 +00001855 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001856
Joachim Protze82e94a52017-11-01 10:08:30 +00001857 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001858 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001859 &ompt_parallel_data, return_address);
1860 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1861 // don't use lw_taskteam after linking. content was swaped
1862 task_info = OMPT_CUR_TASK_INFO(master_th);
1863 exit_runtime_p = &(task_info->frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864
Jonathan Peyton30419822017-05-12 18:01:32 +00001865 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001866 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1867 if (ompt_enabled.ompt_callback_implicit_task) {
1868 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1869 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1870 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001871 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001872
Jonathan Peyton30419822017-05-12 18:01:32 +00001873 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001874 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001875 } else {
1876 exit_runtime_p = &dummy;
1877 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001878#endif
1879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 {
1881 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1882 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1883 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001884#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001885 ,
1886 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001887#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 );
1889 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001890
1891#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001892 if (ompt_enabled.enabled) {
1893 *exit_runtime_p = NULL;
1894 if (ompt_enabled.ompt_callback_implicit_task) {
1895 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1896 ompt_scope_end, NULL, &(task_info->task_data), 1,
1897 __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00001898 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001899
Joachim Protze82e94a52017-11-01 10:08:30 +00001900 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001901 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001902 if (ompt_enabled.ompt_callback_parallel_end) {
1903 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1904 &ompt_parallel_data, parent_task_data,
1905 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001906 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001907 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001908 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001909#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001912#endif /* OMP_40_ENABLED */
1913 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001914#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001915 ompt_lw_taskteam_t lwt;
1916 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1917 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001918
Joachim Protze82e94a52017-11-01 10:08:30 +00001919 lwt.ompt_task_info.frame.exit_runtime_frame = NULL;
1920 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1921// don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001922#endif
1923
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 // we were called from GNU native code
1925 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001927 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001928 KMP_ASSERT2(call_context < fork_context_last,
1929 "__kmp_fork_call: unknown fork_context parameter");
1930 }
1931
1932 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1933 KMP_MB();
1934 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 }
1936
Jim Cownie5e8470a2013-09-27 10:38:44 +00001937 // GEH: only modify the executing flag in the case when not serialized
1938 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001939 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1940 "curtask=%p, curtask_max_aclevel=%d\n",
1941 parent_team->t.t_active_level, master_th,
1942 master_th->th.th_current_task,
1943 master_th->th.th_current_task->td_icvs.max_active_levels));
1944 // TODO: GEH - cannot do this assertion because root thread not set up as
1945 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1947 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948
1949#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001950 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951#endif /* OMP_40_ENABLED */
1952 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001953 /* Increment our nested depth level */
1954 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
1956
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001959 if ((level + 1 < __kmp_nested_nth.used) &&
1960 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1961 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1962 } else {
1963 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 }
1965
1966#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001968 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001969 kmp_proc_bind_t proc_bind_icv =
1970 proc_bind_default; // proc_bind_default means don't update
1971 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1972 proc_bind = proc_bind_false;
1973 } else {
1974 if (proc_bind == proc_bind_default) {
1975 // No proc_bind clause specified; use current proc-bind-var for this
1976 // parallel region
1977 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1978 }
1979 /* else: The proc_bind policy was specified explicitly on parallel clause.
1980 This overrides proc-bind-var for this parallel region, but does not
1981 change proc-bind-var. */
1982 // Figure the value of proc-bind-var for the child threads.
1983 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1984 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1985 master_th->th.th_current_task->td_icvs.proc_bind)) {
1986 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1987 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988 }
1989
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 master_th->th.th_set_proc_bind = proc_bind_default;
1992#endif /* OMP_40_ENABLED */
1993
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001994 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001996 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001998 ) {
1999 kmp_internal_control_t new_icvs;
2000 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2001 new_icvs.next = NULL;
2002 if (nthreads_icv > 0) {
2003 new_icvs.nproc = nthreads_icv;
2004 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005
2006#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 if (proc_bind_icv != proc_bind_default) {
2008 new_icvs.proc_bind = proc_bind_icv;
2009 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010#endif /* OMP_40_ENABLED */
2011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 /* allocate a new parallel team */
2013 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2014 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002015#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002016 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002019 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002021 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002022 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 /* allocate a new parallel team */
2024 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2025 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002026#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002027 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002028#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002030 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002032 &master_th->th.th_current_task->td_icvs,
2033 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002035 KF_TRACE(
2036 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037
2038 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002039 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2040 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2041 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2042 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2043 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002044#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002045 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2046 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002047#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002048 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2049// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002051 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002053 int new_level = parent_team->t.t_level + 1;
2054 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2055 new_level = parent_team->t.t_active_level + 1;
2056 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057#if OMP_40_ENABLED
2058 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002059 // AC: Do not increase parallel level at start of the teams construct
2060 int new_level = parent_team->t.t_level;
2061 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2062 new_level = parent_team->t.t_active_level;
2063 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064 }
2065#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002066 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002067 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
2068 team->t.t_sched.chunk != new_sched.chunk)
2069 team->t.t_sched =
2070 new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002071
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002072#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002073 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002074#endif
2075
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002076 // Update the floating point rounding in the team if required.
2077 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
Jonathan Peyton30419822017-05-12 18:01:32 +00002079 if (__kmp_tasking_mode != tskm_immediate_exec) {
2080 // Set master's task team to team's task team. Unless this is hot team, it
2081 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002082 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2083 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002084 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2085 "%p, new task_team %p / team %p\n",
2086 __kmp_gtid_from_thread(master_th),
2087 master_th->th.th_task_team, parent_team,
2088 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002089
Jonathan Peyton30419822017-05-12 18:01:32 +00002090 if (active_level || master_th->th.th_task_team) {
2091 // Take a memo of master's task_state
2092 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2093 if (master_th->th.th_task_state_top >=
2094 master_th->th.th_task_state_stack_sz) { // increase size
2095 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2096 kmp_uint8 *old_stack, *new_stack;
2097 kmp_uint32 i;
2098 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2099 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2100 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2101 }
2102 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2103 ++i) { // zero-init rest of stack
2104 new_stack[i] = 0;
2105 }
2106 old_stack = master_th->th.th_task_state_memo_stack;
2107 master_th->th.th_task_state_memo_stack = new_stack;
2108 master_th->th.th_task_state_stack_sz = new_size;
2109 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002110 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002111 // Store master's task_state on stack
2112 master_th->th
2113 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2114 master_th->th.th_task_state;
2115 master_th->th.th_task_state_top++;
2116#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002117 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2118 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002119 master_th->th.th_task_state =
2120 master_th->th
2121 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2122 } else {
2123#endif
2124 master_th->th.th_task_state = 0;
2125#if KMP_NESTED_HOT_TEAMS
2126 }
2127#endif
2128 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002130 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2131 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002132#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
Jonathan Peyton30419822017-05-12 18:01:32 +00002135 KA_TRACE(
2136 20,
2137 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2138 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2139 team->t.t_nproc));
2140 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2141 (team->t.t_master_tid == 0 &&
2142 (team->t.t_parent == root->r.r_root_team ||
2143 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002144 KMP_MB();
2145
2146 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002147 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002148#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002149 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002151 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002153#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002154 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002155#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002156 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002158 KMP_CHECK_UPDATE(*argv, new_argv);
2159 argv++;
2160 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002161#if OMP_40_ENABLED
2162 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002163 for (i = 0; i < argc; ++i) {
2164 // Get args from parent team for teams construct
2165 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2166 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002167 }
2168#endif /* OMP_40_ENABLED */
2169
2170 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002171 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002172 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002173 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174
Jonathan Peyton30419822017-05-12 18:01:32 +00002175 __kmp_fork_team_threads(root, team, master_th, gtid);
2176 __kmp_setup_icv_copy(team, nthreads,
2177 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002179#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002180 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002181#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002182
Jonathan Peyton30419822017-05-12 18:01:32 +00002183 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002184
Jim Cownie5e8470a2013-09-27 10:38:44 +00002185#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002186 if (team->t.t_active_level == 1 // only report frames at level 1
2187#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002188 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002189#endif /* OMP_40_ENABLED */
2190 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002191#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002192 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2193 (__kmp_forkjoin_frames_mode == 3 ||
2194 __kmp_forkjoin_frames_mode == 1)) {
2195 kmp_uint64 tmp_time = 0;
2196 if (__itt_get_timestamp_ptr)
2197 tmp_time = __itt_get_timestamp();
2198 // Internal fork - report frame begin
2199 master_th->th.th_frame_time = tmp_time;
2200 if (__kmp_forkjoin_frames_mode == 3)
2201 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002202 } else
2203// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002204#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002205 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2206 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2207 // Mark start of "parallel" region for VTune.
2208 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2209 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002210 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002211#endif /* USE_ITT_BUILD */
2212
2213 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002214 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002216 KF_TRACE(10,
2217 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2218 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002219
2220#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002221 if (__itt_stack_caller_create_ptr) {
2222 team->t.t_stack_id =
2223 __kmp_itt_stack_caller_create(); // create new stack stitching id
2224 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002225 }
2226#endif /* USE_ITT_BUILD */
2227
2228#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002229 // AC: skip __kmp_internal_fork at teams construct, let only master
2230 // threads execute
2231 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002232#endif /* OMP_40_ENABLED */
2233 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002234 __kmp_internal_fork(loc, gtid, team);
2235 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2236 "master_th=%p, gtid=%d\n",
2237 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002238 }
2239
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002240 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002241 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2242 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002243 }
2244
2245 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002246 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2247 team->t.t_id, team->t.t_pkfn));
2248 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002249
Jonathan Peyton30419822017-05-12 18:01:32 +00002250 {
2251 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2252 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2253 if (!team->t.t_invoke(gtid)) {
2254 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002256 }
2257 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2258 team->t.t_id, team->t.t_pkfn));
2259 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260
Jonathan Peyton30419822017-05-12 18:01:32 +00002261 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002262
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002263#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002264 if (ompt_enabled.enabled) {
2265 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002267#endif
2268
Jonathan Peyton30419822017-05-12 18:01:32 +00002269 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002270}
2271
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002272#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002273static inline void __kmp_join_restore_state(kmp_info_t *thread,
2274 kmp_team_t *team) {
2275 // restore state outside the region
2276 thread->th.ompt_thread_info.state =
Joachim Protze82e94a52017-11-01 10:08:30 +00002277 ((team->t.t_serialized) ? omp_state_work_serial
2278 : omp_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002279}
2280
Joachim Protze82e94a52017-11-01 10:08:30 +00002281static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2282 kmp_team_t *team, ompt_data_t *parallel_data,
2283 fork_context_e fork_context, void *codeptr) {
2284 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2285 if (ompt_enabled.ompt_callback_parallel_end) {
2286 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2287 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2288 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002289 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002290
Jonathan Peyton30419822017-05-12 18:01:32 +00002291 task_info->frame.reenter_runtime_frame = NULL;
2292 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002293}
2294#endif
2295
Jonathan Peyton30419822017-05-12 18:01:32 +00002296void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002297#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002298 ,
2299 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002300#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002302 ,
2303 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 ) {
2306 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2307 kmp_team_t *team;
2308 kmp_team_t *parent_team;
2309 kmp_info_t *master_th;
2310 kmp_root_t *root;
2311 int master_active;
2312 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313
Jonathan Peyton30419822017-05-12 18:01:32 +00002314 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315
Jonathan Peyton30419822017-05-12 18:01:32 +00002316 /* setup current data */
2317 master_th = __kmp_threads[gtid];
2318 root = master_th->th.th_root;
2319 team = master_th->th.th_team;
2320 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002321
Jonathan Peyton30419822017-05-12 18:01:32 +00002322 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002324#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002325 if (ompt_enabled.enabled) {
2326 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002327 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002328#endif
2329
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002330#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002331 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2332 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2333 "th_task_team = %p\n",
2334 __kmp_gtid_from_thread(master_th), team,
2335 team->t.t_task_team[master_th->th.th_task_state],
2336 master_th->th.th_task_team));
2337 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2338 team->t.t_task_team[master_th->th.th_task_state]);
2339 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002340#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341
Jonathan Peyton30419822017-05-12 18:01:32 +00002342 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002344 if (master_th->th.th_teams_microtask) {
2345 // We are in teams construct
2346 int level = team->t.t_level;
2347 int tlevel = master_th->th.th_teams_level;
2348 if (level == tlevel) {
2349 // AC: we haven't incremented it earlier at start of teams construct,
2350 // so do it here - at the end of teams construct
2351 team->t.t_level++;
2352 } else if (level == tlevel + 1) {
2353 // AC: we are exiting parallel inside teams, need to increment
2354 // serialization in order to restore it in the next call to
2355 // __kmpc_end_serialized_parallel
2356 team->t.t_serialized++;
2357 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002358 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002359#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002362#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002363 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002364 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002365 }
2366#endif
2367
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 return;
2369 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002370
Jonathan Peyton30419822017-05-12 18:01:32 +00002371 master_active = team->t.t_master_active;
2372
2373#if OMP_40_ENABLED
2374 if (!exit_teams)
2375#endif /* OMP_40_ENABLED */
2376 {
2377 // AC: No barrier for internal teams at exit from teams construct.
2378 // But there is barrier for external team (league).
2379 __kmp_internal_join(loc, gtid, team);
2380 }
2381#if OMP_40_ENABLED
2382 else {
2383 master_th->th.th_task_state =
2384 0; // AC: no tasking in teams (out of any parallel)
2385 }
2386#endif /* OMP_40_ENABLED */
2387
2388 KMP_MB();
2389
2390#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002391 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2392 void *codeptr = team->t.ompt_team_info.master_return_address;
Jonathan Peyton30419822017-05-12 18:01:32 +00002393#endif
2394
2395#if USE_ITT_BUILD
2396 if (__itt_stack_caller_create_ptr) {
2397 __kmp_itt_stack_caller_destroy(
2398 (__itt_caller)team->t
2399 .t_stack_id); // destroy the stack stitching id after join barrier
2400 }
2401
2402 // Mark end of "parallel" region for VTune.
2403 if (team->t.t_active_level == 1
2404#if OMP_40_ENABLED
2405 && !master_th->th.th_teams_microtask /* not in teams construct */
2406#endif /* OMP_40_ENABLED */
2407 ) {
2408 master_th->th.th_ident = loc;
2409 // only one notification scheme (either "submit" or "forking/joined", not
2410 // both)
2411 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2412 __kmp_forkjoin_frames_mode == 3)
2413 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2414 master_th->th.th_frame_time, 0, loc,
2415 master_th->th.th_team_nproc, 1);
2416 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2417 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2418 __kmp_itt_region_joined(gtid);
2419 } // active_level == 1
2420#endif /* USE_ITT_BUILD */
2421
2422#if OMP_40_ENABLED
2423 if (master_th->th.th_teams_microtask && !exit_teams &&
2424 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2425 team->t.t_level == master_th->th.th_teams_level + 1) {
2426 // AC: We need to leave the team structure intact at the end of parallel
2427 // inside the teams construct, so that at the next parallel same (hot) team
2428 // works, only adjust nesting levels
2429
2430 /* Decrement our nested depth level */
2431 team->t.t_level--;
2432 team->t.t_active_level--;
2433 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2434
2435 /* Restore number of threads in the team if needed */
2436 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2437 int old_num = master_th->th.th_team_nproc;
2438 int new_num = master_th->th.th_teams_size.nth;
2439 kmp_info_t **other_threads = team->t.t_threads;
2440 team->t.t_nproc = new_num;
2441 for (i = 0; i < old_num; ++i) {
2442 other_threads[i]->th.th_team_nproc = new_num;
2443 }
2444 // Adjust states of non-used threads of the team
2445 for (i = old_num; i < new_num; ++i) {
2446 // Re-initialize thread's barrier data.
2447 int b;
2448 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2449 for (b = 0; b < bs_last_barrier; ++b) {
2450 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2451 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2452#if USE_DEBUGGER
2453 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2454#endif
2455 }
2456 if (__kmp_tasking_mode != tskm_immediate_exec) {
2457 // Synchronize thread's task state
2458 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2459 }
2460 }
2461 }
2462
2463#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002464 if (ompt_enabled.enabled) {
2465 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2466 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002467 }
2468#endif
2469
2470 return;
2471 }
2472#endif /* OMP_40_ENABLED */
2473
2474 /* do cleanup and restore the parent team */
2475 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2476 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2477
2478 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2479
2480 /* jc: The following lock has instructions with REL and ACQ semantics,
2481 separating the parallel user code called in this parallel region
2482 from the serial user code called after this function returns. */
2483 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2484
2485#if OMP_40_ENABLED
2486 if (!master_th->th.th_teams_microtask ||
2487 team->t.t_level > master_th->th.th_teams_level)
2488#endif /* OMP_40_ENABLED */
2489 {
2490 /* Decrement our nested depth level */
2491 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2492 }
2493 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2494
Joachim Protze82e94a52017-11-01 10:08:30 +00002495#if OMPT_SUPPORT
2496 if (ompt_enabled.enabled) {
2497 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2498 if (ompt_enabled.ompt_callback_implicit_task) {
2499 int ompt_team_size = team->t.t_nproc;
2500 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2501 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2502 __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00002503 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002504
Jonathan Peyton30419822017-05-12 18:01:32 +00002505 task_info->frame.exit_runtime_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002506 task_info->task_data = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002507 }
2508#endif
2509
2510 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2511 master_th, team));
2512 __kmp_pop_current_task_from_thread(master_th);
2513
2514#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2515 // Restore master thread's partition.
2516 master_th->th.th_first_place = team->t.t_first_place;
2517 master_th->th.th_last_place = team->t.t_last_place;
2518#endif /* OMP_40_ENABLED */
2519
2520 updateHWFPControl(team);
2521
2522 if (root->r.r_active != master_active)
2523 root->r.r_active = master_active;
2524
2525 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2526 master_th)); // this will free worker threads
2527
2528 /* this race was fun to find. make sure the following is in the critical
2529 region otherwise assertions may fail occasionally since the old team may be
2530 reallocated and the hierarchy appears inconsistent. it is actually safe to
2531 run and won't cause any bugs, but will cause those assertion failures. it's
2532 only one deref&assign so might as well put this in the critical region */
2533 master_th->th.th_team = parent_team;
2534 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2535 master_th->th.th_team_master = parent_team->t.t_threads[0];
2536 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2537
2538 /* restore serialized team, if need be */
2539 if (parent_team->t.t_serialized &&
2540 parent_team != master_th->th.th_serial_team &&
2541 parent_team != root->r.r_root_team) {
2542 __kmp_free_team(root,
2543 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2544 master_th->th.th_serial_team = parent_team;
2545 }
2546
2547 if (__kmp_tasking_mode != tskm_immediate_exec) {
2548 if (master_th->th.th_task_state_top >
2549 0) { // Restore task state from memo stack
2550 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2551 // Remember master's state if we re-use this nested hot team
2552 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2553 master_th->th.th_task_state;
2554 --master_th->th.th_task_state_top; // pop
2555 // Now restore state at this level
2556 master_th->th.th_task_state =
2557 master_th->th
2558 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2559 }
2560 // Copy the task team from the parent team to the master thread
2561 master_th->th.th_task_team =
2562 parent_team->t.t_task_team[master_th->th.th_task_state];
2563 KA_TRACE(20,
2564 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2565 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2566 parent_team));
2567 }
2568
2569 // TODO: GEH - cannot do this assertion because root thread not set up as
2570 // executing
2571 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2572 master_th->th.th_current_task->td_flags.executing = 1;
2573
2574 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2575
2576#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002577 if (ompt_enabled.enabled) {
2578 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2579 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002580 }
2581#endif
2582
2583 KMP_MB();
2584 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2585}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586
2587/* Check whether we should push an internal control record onto the
2588 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002589void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002590
Jonathan Peyton30419822017-05-12 18:01:32 +00002591 if (thread->th.th_team != thread->th.th_serial_team) {
2592 return;
2593 }
2594 if (thread->th.th_team->t.t_serialized > 1) {
2595 int push = 0;
2596
2597 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2598 push = 1;
2599 } else {
2600 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2601 thread->th.th_team->t.t_serialized) {
2602 push = 1;
2603 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 if (push) { /* push a record on the serial team's stack */
2606 kmp_internal_control_t *control =
2607 (kmp_internal_control_t *)__kmp_allocate(
2608 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611
Jonathan Peyton30419822017-05-12 18:01:32 +00002612 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613
Jonathan Peyton30419822017-05-12 18:01:32 +00002614 control->next = thread->th.th_team->t.t_control_stack_top;
2615 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002617 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618}
2619
2620/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002621void __kmp_set_num_threads(int new_nth, int gtid) {
2622 kmp_info_t *thread;
2623 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624
Jonathan Peyton30419822017-05-12 18:01:32 +00002625 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2626 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627
Jonathan Peyton30419822017-05-12 18:01:32 +00002628 if (new_nth < 1)
2629 new_nth = 1;
2630 else if (new_nth > __kmp_max_nth)
2631 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2634 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002635
Jonathan Peyton30419822017-05-12 18:01:32 +00002636 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637
Jonathan Peyton30419822017-05-12 18:01:32 +00002638 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639
Jonathan Peyton30419822017-05-12 18:01:32 +00002640 // If this omp_set_num_threads() call will cause the hot team size to be
2641 // reduced (in the absence of a num_threads clause), then reduce it now,
2642 // rather than waiting for the next parallel region.
2643 root = thread->th.th_root;
2644 if (__kmp_init_parallel && (!root->r.r_active) &&
2645 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002646#if KMP_NESTED_HOT_TEAMS
2647 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2648#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002649 ) {
2650 kmp_team_t *hot_team = root->r.r_hot_team;
2651 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002652
Jonathan Peyton30419822017-05-12 18:01:32 +00002653 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654
Jonathan Peyton30419822017-05-12 18:01:32 +00002655 // Release the extra threads we don't need any more.
2656 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2657 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2658 if (__kmp_tasking_mode != tskm_immediate_exec) {
2659 // When decreasing team size, threads no longer in the team should unref
2660 // task team.
2661 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2662 }
2663 __kmp_free_thread(hot_team->t.t_threads[f]);
2664 hot_team->t.t_threads[f] = NULL;
2665 }
2666 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002667#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002668 if (thread->th.th_hot_teams) {
2669 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2670 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2671 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002672#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002673
Jonathan Peyton30419822017-05-12 18:01:32 +00002674 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675
Jonathan Peyton30419822017-05-12 18:01:32 +00002676 // Update the t_nproc field in the threads that are still active.
2677 for (f = 0; f < new_nth; f++) {
2678 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2679 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 // Special flag in case omp_set_num_threads() call
2682 hot_team->t.t_size_changed = -1;
2683 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684}
2685
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002687void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2688 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002689
Jonathan Peyton30419822017-05-12 18:01:32 +00002690 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2691 "%d = (%d)\n",
2692 gtid, max_active_levels));
2693 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002694
Jonathan Peyton30419822017-05-12 18:01:32 +00002695 // validate max_active_levels
2696 if (max_active_levels < 0) {
2697 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2698 // We ignore this call if the user has specified a negative value.
2699 // The current setting won't be changed. The last valid setting will be
2700 // used. A warning will be issued (if warnings are allowed as controlled by
2701 // the KMP_WARNINGS env var).
2702 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2703 "max_active_levels for thread %d = (%d)\n",
2704 gtid, max_active_levels));
2705 return;
2706 }
2707 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2708 // it's OK, the max_active_levels is within the valid range: [ 0;
2709 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2710 // We allow a zero value. (implementation defined behavior)
2711 } else {
2712 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2713 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2714 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2715 // Current upper limit is MAX_INT. (implementation defined behavior)
2716 // If the input exceeds the upper limit, we correct the input to be the
2717 // upper limit. (implementation defined behavior)
2718 // Actually, the flow should never get here until we use MAX_INT limit.
2719 }
2720 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2721 "max_active_levels for thread %d = (%d)\n",
2722 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727
Jonathan Peyton30419822017-05-12 18:01:32 +00002728 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729}
2730
2731/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002732int __kmp_get_max_active_levels(int gtid) {
2733 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2736 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737
Jonathan Peyton30419822017-05-12 18:01:32 +00002738 thread = __kmp_threads[gtid];
2739 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2740 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2741 "curtask_maxaclevel=%d\n",
2742 gtid, thread->th.th_current_task,
2743 thread->th.th_current_task->td_icvs.max_active_levels));
2744 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002745}
2746
2747/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002748void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2749 kmp_info_t *thread;
2750 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002751
Jonathan Peyton30419822017-05-12 18:01:32 +00002752 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2753 gtid, (int)kind, chunk));
2754 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755
Jonathan Peyton30419822017-05-12 18:01:32 +00002756 // Check if the kind parameter is valid, correct if needed.
2757 // Valid parameters should fit in one of two intervals - standard or extended:
2758 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2759 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2760 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2761 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2762 // TODO: Hint needs attention in case we change the default schedule.
2763 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2764 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2765 __kmp_msg_null);
2766 kind = kmp_sched_default;
2767 chunk = 0; // ignore chunk value in case of bad kind
2768 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002769
Jonathan Peyton30419822017-05-12 18:01:32 +00002770 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002771
Jonathan Peyton30419822017-05-12 18:01:32 +00002772 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773
Jonathan Peyton30419822017-05-12 18:01:32 +00002774 if (kind < kmp_sched_upper_std) {
2775 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2776 // differ static chunked vs. unchunked: chunk should be invalid to
2777 // indicate unchunked schedule (which is the default)
2778 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002780 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2781 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002783 } else {
2784 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2785 // kmp_sched_lower - 2 ];
2786 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2787 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2788 kmp_sched_lower - 2];
2789 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002790 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 // ignore parameter chunk for schedule auto
2792 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2793 } else {
2794 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2795 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796}
2797
2798/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002799void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2800 kmp_info_t *thread;
2801 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802
Jonathan Peyton30419822017-05-12 18:01:32 +00002803 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2804 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002805
Jonathan Peyton30419822017-05-12 18:01:32 +00002806 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807
Jonathan Peyton30419822017-05-12 18:01:32 +00002808 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809
Jonathan Peyton30419822017-05-12 18:01:32 +00002810 switch (th_type) {
2811 case kmp_sch_static:
2812 case kmp_sch_static_greedy:
2813 case kmp_sch_static_balanced:
2814 *kind = kmp_sched_static;
2815 *chunk = 0; // chunk was not set, try to show this fact via zero value
2816 return;
2817 case kmp_sch_static_chunked:
2818 *kind = kmp_sched_static;
2819 break;
2820 case kmp_sch_dynamic_chunked:
2821 *kind = kmp_sched_dynamic;
2822 break;
2823 case kmp_sch_guided_chunked:
2824 case kmp_sch_guided_iterative_chunked:
2825 case kmp_sch_guided_analytical_chunked:
2826 *kind = kmp_sched_guided;
2827 break;
2828 case kmp_sch_auto:
2829 *kind = kmp_sched_auto;
2830 break;
2831 case kmp_sch_trapezoidal:
2832 *kind = kmp_sched_trapezoidal;
2833 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002834#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 case kmp_sch_static_steal:
2836 *kind = kmp_sched_static_steal;
2837 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002838#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 default:
2840 KMP_FATAL(UnknownSchedulingType, th_type);
2841 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844}
2845
Jonathan Peyton30419822017-05-12 18:01:32 +00002846int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847
Jonathan Peyton30419822017-05-12 18:01:32 +00002848 int ii, dd;
2849 kmp_team_t *team;
2850 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002851
Jonathan Peyton30419822017-05-12 18:01:32 +00002852 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2853 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854
Jonathan Peyton30419822017-05-12 18:01:32 +00002855 // validate level
2856 if (level == 0)
2857 return 0;
2858 if (level < 0)
2859 return -1;
2860 thr = __kmp_threads[gtid];
2861 team = thr->th.th_team;
2862 ii = team->t.t_level;
2863 if (level > ii)
2864 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865
2866#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002867 if (thr->th.th_teams_microtask) {
2868 // AC: we are in teams region where multiple nested teams have same level
2869 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2870 if (level <=
2871 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2872 KMP_DEBUG_ASSERT(ii >= tlevel);
2873 // AC: As we need to pass by the teams league, we need to artificially
2874 // increase ii
2875 if (ii == tlevel) {
2876 ii += 2; // three teams have same level
2877 } else {
2878 ii++; // two teams have same level
2879 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002881 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882#endif
2883
Jonathan Peyton30419822017-05-12 18:01:32 +00002884 if (ii == level)
2885 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002886
Jonathan Peyton30419822017-05-12 18:01:32 +00002887 dd = team->t.t_serialized;
2888 level++;
2889 while (ii > level) {
2890 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002891 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002892 if ((team->t.t_serialized) && (!dd)) {
2893 team = team->t.t_parent;
2894 continue;
2895 }
2896 if (ii > level) {
2897 team = team->t.t_parent;
2898 dd = team->t.t_serialized;
2899 ii--;
2900 }
2901 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902
Jonathan Peyton30419822017-05-12 18:01:32 +00002903 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002904}
2905
Jonathan Peyton30419822017-05-12 18:01:32 +00002906int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002907
Jonathan Peyton30419822017-05-12 18:01:32 +00002908 int ii, dd;
2909 kmp_team_t *team;
2910 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911
Jonathan Peyton30419822017-05-12 18:01:32 +00002912 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2913 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002914
Jonathan Peyton30419822017-05-12 18:01:32 +00002915 // validate level
2916 if (level == 0)
2917 return 1;
2918 if (level < 0)
2919 return -1;
2920 thr = __kmp_threads[gtid];
2921 team = thr->th.th_team;
2922 ii = team->t.t_level;
2923 if (level > ii)
2924 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925
2926#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002927 if (thr->th.th_teams_microtask) {
2928 // AC: we are in teams region where multiple nested teams have same level
2929 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2930 if (level <=
2931 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2932 KMP_DEBUG_ASSERT(ii >= tlevel);
2933 // AC: As we need to pass by the teams league, we need to artificially
2934 // increase ii
2935 if (ii == tlevel) {
2936 ii += 2; // three teams have same level
2937 } else {
2938 ii++; // two teams have same level
2939 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942#endif
2943
Jonathan Peyton30419822017-05-12 18:01:32 +00002944 while (ii > level) {
2945 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002947 if (team->t.t_serialized && (!dd)) {
2948 team = team->t.t_parent;
2949 continue;
2950 }
2951 if (ii > level) {
2952 team = team->t.t_parent;
2953 ii--;
2954 }
2955 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002956
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958}
2959
Jonathan Peyton30419822017-05-12 18:01:32 +00002960kmp_r_sched_t __kmp_get_schedule_global() {
2961 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2962 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2963 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002964
Jonathan Peyton30419822017-05-12 18:01:32 +00002965 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966
Jonathan Peyton30419822017-05-12 18:01:32 +00002967 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2968 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2969 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2970 // different roots (even in OMP 2.5)
2971 if (__kmp_sched == kmp_sch_static) {
2972 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed
2973 // schedule (balanced or greedy)
2974 } else if (__kmp_sched == kmp_sch_guided_chunked) {
2975 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed
2976 // schedule (iterative or analytical)
2977 } else {
2978 r_sched.r_sched_type =
2979 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2980 }
2981
2982 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it
2983 // was not ever set)
2984 r_sched.chunk = KMP_DEFAULT_CHUNK;
2985 } else {
2986 r_sched.chunk = __kmp_chunk;
2987 }
2988
2989 return r_sched;
2990}
2991
2992/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2993 at least argc number of *t_argv entries for the requested team. */
2994static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2995
2996 KMP_DEBUG_ASSERT(team);
2997 if (!realloc || argc > team->t.t_max_argc) {
2998
2999 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3000 "current entries=%d\n",
3001 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3002 /* if previously allocated heap space for args, free them */
3003 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3004 __kmp_free((void *)team->t.t_argv);
3005
3006 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3007 /* use unused space in the cache line for arguments */
3008 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3009 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3010 "argv entries\n",
3011 team->t.t_id, team->t.t_max_argc));
3012 team->t.t_argv = &team->t.t_inline_argv[0];
3013 if (__kmp_storage_map) {
3014 __kmp_print_storage_map_gtid(
3015 -1, &team->t.t_inline_argv[0],
3016 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3017 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3018 team->t.t_id);
3019 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003021 /* allocate space for arguments in the heap */
3022 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3023 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3024 : 2 * argc;
3025 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3026 "argv entries\n",
3027 team->t.t_id, team->t.t_max_argc));
3028 team->t.t_argv =
3029 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3030 if (__kmp_storage_map) {
3031 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3032 &team->t.t_argv[team->t.t_max_argc],
3033 sizeof(void *) * team->t.t_max_argc,
3034 "team_%d.t_argv", team->t.t_id);
3035 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003037 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038}
3039
Jonathan Peyton30419822017-05-12 18:01:32 +00003040static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3041 int i;
3042 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3043 team->t.t_threads =
3044 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3045 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3046 sizeof(dispatch_shared_info_t) * num_disp_buff);
3047 team->t.t_dispatch =
3048 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3049 team->t.t_implicit_task_taskdata =
3050 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3051 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 /* setup dispatch buffers */
3054 for (i = 0; i < num_disp_buff; ++i) {
3055 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003056#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003057 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003058#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003059 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003060}
3061
Jonathan Peyton30419822017-05-12 18:01:32 +00003062static void __kmp_free_team_arrays(kmp_team_t *team) {
3063 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3064 int i;
3065 for (i = 0; i < team->t.t_max_nproc; ++i) {
3066 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3067 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3068 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003069 }
3070 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003071 __kmp_free(team->t.t_threads);
3072 __kmp_free(team->t.t_disp_buffer);
3073 __kmp_free(team->t.t_dispatch);
3074 __kmp_free(team->t.t_implicit_task_taskdata);
3075 team->t.t_threads = NULL;
3076 team->t.t_disp_buffer = NULL;
3077 team->t.t_dispatch = NULL;
3078 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079}
3080
Jonathan Peyton30419822017-05-12 18:01:32 +00003081static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3082 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083
Jonathan Peyton30419822017-05-12 18:01:32 +00003084 __kmp_free(team->t.t_disp_buffer);
3085 __kmp_free(team->t.t_dispatch);
3086 __kmp_free(team->t.t_implicit_task_taskdata);
3087 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003088
Jonathan Peyton30419822017-05-12 18:01:32 +00003089 KMP_MEMCPY(team->t.t_threads, oldThreads,
3090 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003091
Jonathan Peyton30419822017-05-12 18:01:32 +00003092 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093}
3094
Jonathan Peyton30419822017-05-12 18:01:32 +00003095static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003096
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 kmp_r_sched_t r_sched =
3098 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099
3100#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102#endif /* OMP_40_ENABLED */
3103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104 kmp_internal_control_t g_icvs = {
3105 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3106 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3107 // for nested parallelism (per thread)
3108 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3109 // adjustment of threads (per thread)
3110 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3111 // whether blocktime is explicitly set
3112 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003113#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003114 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3115// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003116#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003117 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3118 // next parallel region (per thread)
3119 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3120 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3121 // for max_active_levels
3122 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3123// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 __kmp_nested_proc_bind.bind_types[0],
3126 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003127#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003128 NULL // struct kmp_internal_control *next;
3129 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003130
Jonathan Peyton30419822017-05-12 18:01:32 +00003131 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003132}
3133
Jonathan Peyton30419822017-05-12 18:01:32 +00003134static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003135
Jonathan Peyton30419822017-05-12 18:01:32 +00003136 kmp_internal_control_t gx_icvs;
3137 gx_icvs.serial_nesting_level =
3138 0; // probably =team->t.t_serial like in save_inter_controls
3139 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3140 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141
Jonathan Peyton30419822017-05-12 18:01:32 +00003142 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143}
3144
Jonathan Peyton30419822017-05-12 18:01:32 +00003145static void __kmp_initialize_root(kmp_root_t *root) {
3146 int f;
3147 kmp_team_t *root_team;
3148 kmp_team_t *hot_team;
3149 int hot_team_max_nth;
3150 kmp_r_sched_t r_sched =
3151 __kmp_get_schedule_global(); // get current state of scheduling globals
3152 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3153 KMP_DEBUG_ASSERT(root);
3154 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
Jonathan Peyton30419822017-05-12 18:01:32 +00003156 /* setup the root state structure */
3157 __kmp_init_lock(&root->r.r_begin_lock);
3158 root->r.r_begin = FALSE;
3159 root->r.r_active = FALSE;
3160 root->r.r_in_parallel = 0;
3161 root->r.r_blocktime = __kmp_dflt_blocktime;
3162 root->r.r_nested = __kmp_dflt_nested;
Jonathan Peytonf4392462017-07-27 20:58:41 +00003163 root->r.r_cg_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003164
Jonathan Peyton30419822017-05-12 18:01:32 +00003165 /* setup the root team for this task */
3166 /* allocate the root team structure */
3167 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003168
Jonathan Peyton30419822017-05-12 18:01:32 +00003169 root_team =
3170 __kmp_allocate_team(root,
3171 1, // new_nproc
3172 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003173#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003174 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003175#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003176#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003178#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003179 &r_icvs,
3180 0 // argc
3181 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3182 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003183#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003184 // Non-NULL value should be assigned to make the debugger display the root
3185 // team.
3186 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003187#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003188
Jonathan Peyton30419822017-05-12 18:01:32 +00003189 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003190
Jonathan Peyton30419822017-05-12 18:01:32 +00003191 root->r.r_root_team = root_team;
3192 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003193
Jonathan Peyton30419822017-05-12 18:01:32 +00003194 /* initialize root team */
3195 root_team->t.t_threads[0] = NULL;
3196 root_team->t.t_nproc = 1;
3197 root_team->t.t_serialized = 1;
3198 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3199 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3200 root_team->t.t_sched.chunk = r_sched.chunk;
3201 KA_TRACE(
3202 20,
3203 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3204 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205
Jonathan Peyton30419822017-05-12 18:01:32 +00003206 /* setup the hot team for this task */
3207 /* allocate the hot team structure */
3208 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003209
Jonathan Peyton30419822017-05-12 18:01:32 +00003210 hot_team =
3211 __kmp_allocate_team(root,
3212 1, // new_nproc
3213 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003214#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003215 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003216#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003218 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003219#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003220 &r_icvs,
3221 0 // argc
3222 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3223 );
3224 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003225
Jonathan Peyton30419822017-05-12 18:01:32 +00003226 root->r.r_hot_team = hot_team;
3227 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003228
Jonathan Peyton30419822017-05-12 18:01:32 +00003229 /* first-time initialization */
3230 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231
Jonathan Peyton30419822017-05-12 18:01:32 +00003232 /* initialize hot team */
3233 hot_team_max_nth = hot_team->t.t_max_nproc;
3234 for (f = 0; f < hot_team_max_nth; ++f) {
3235 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003236 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003237 hot_team->t.t_nproc = 1;
3238 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3239 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3240 hot_team->t.t_sched.chunk = r_sched.chunk;
3241 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003242}
3243
3244#ifdef KMP_DEBUG
3245
Jim Cownie5e8470a2013-09-27 10:38:44 +00003246typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003247 kmp_team_p const *entry;
3248 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003249} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003250typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003251
Jonathan Peyton30419822017-05-12 18:01:32 +00003252static void __kmp_print_structure_team_accum( // Add team to list of teams.
3253 kmp_team_list_t list, // List of teams.
3254 kmp_team_p const *team // Team to add.
3255 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003256
Jonathan Peyton30419822017-05-12 18:01:32 +00003257 // List must terminate with item where both entry and next are NULL.
3258 // Team is added to the list only once.
3259 // List is sorted in ascending order by team id.
3260 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003261
Jonathan Peyton30419822017-05-12 18:01:32 +00003262 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 KMP_DEBUG_ASSERT(list != NULL);
3265 if (team == NULL) {
3266 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003267 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268
Jonathan Peyton30419822017-05-12 18:01:32 +00003269 __kmp_print_structure_team_accum(list, team->t.t_parent);
3270 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003271
Jonathan Peyton30419822017-05-12 18:01:32 +00003272 // Search list for the team.
3273 l = list;
3274 while (l->next != NULL && l->entry != team) {
3275 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003276 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 if (l->next != NULL) {
3278 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003279 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003280
Jonathan Peyton30419822017-05-12 18:01:32 +00003281 // Team is not found. Search list again for insertion point.
3282 l = list;
3283 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3284 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003285 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003286
Jonathan Peyton30419822017-05-12 18:01:32 +00003287 // Insert team.
3288 {
3289 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3290 sizeof(kmp_team_list_item_t));
3291 *item = *l;
3292 l->entry = team;
3293 l->next = item;
3294 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003295}
3296
Jonathan Peyton30419822017-05-12 18:01:32 +00003297static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003298
Jonathan Peyton30419822017-05-12 18:01:32 +00003299 ) {
3300 __kmp_printf("%s", title);
3301 if (team != NULL) {
3302 __kmp_printf("%2x %p\n", team->t.t_id, team);
3303 } else {
3304 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003306}
3307
Jonathan Peyton30419822017-05-12 18:01:32 +00003308static void __kmp_print_structure_thread(char const *title,
3309 kmp_info_p const *thread) {
3310 __kmp_printf("%s", title);
3311 if (thread != NULL) {
3312 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3313 } else {
3314 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003315 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003316}
3317
Jonathan Peyton30419822017-05-12 18:01:32 +00003318void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003319
Jonathan Peyton30419822017-05-12 18:01:32 +00003320 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 // Initialize list of teams.
3323 list =
3324 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3325 list->entry = NULL;
3326 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003327
Jonathan Peyton30419822017-05-12 18:01:32 +00003328 __kmp_printf("\n------------------------------\nGlobal Thread "
3329 "Table\n------------------------------\n");
3330 {
3331 int gtid;
3332 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3333 __kmp_printf("%2d", gtid);
3334 if (__kmp_threads != NULL) {
3335 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003336 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003337 if (__kmp_root != NULL) {
3338 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003339 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003340 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003341 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003342 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003343
Jonathan Peyton30419822017-05-12 18:01:32 +00003344 // Print out __kmp_threads array.
3345 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3346 "----------\n");
3347 if (__kmp_threads != NULL) {
3348 int gtid;
3349 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3350 kmp_info_t const *thread = __kmp_threads[gtid];
3351 if (thread != NULL) {
3352 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3353 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3354 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3355 __kmp_print_structure_team(" Serial Team: ",
3356 thread->th.th_serial_team);
3357 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3358 __kmp_print_structure_thread(" Master: ",
3359 thread->th.th_team_master);
3360 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3361 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003362#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003363 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003365 __kmp_print_structure_thread(" Next in pool: ",
3366 thread->th.th_next_pool);
3367 __kmp_printf("\n");
3368 __kmp_print_structure_team_accum(list, thread->th.th_team);
3369 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003370 }
3371 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003372 } else {
3373 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003374 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003375
Jonathan Peyton30419822017-05-12 18:01:32 +00003376 // Print out __kmp_root array.
3377 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3378 "--------\n");
3379 if (__kmp_root != NULL) {
3380 int gtid;
3381 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3382 kmp_root_t const *root = __kmp_root[gtid];
3383 if (root != NULL) {
3384 __kmp_printf("GTID %2d %p:\n", gtid, root);
3385 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3386 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3387 __kmp_print_structure_thread(" Uber Thread: ",
3388 root->r.r_uber_thread);
3389 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3390 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
3391 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel);
3392 __kmp_printf("\n");
3393 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3394 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003395 }
3396 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003397 } else {
3398 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003400
Jonathan Peyton30419822017-05-12 18:01:32 +00003401 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3402 "--------\n");
3403 while (list->next != NULL) {
3404 kmp_team_p const *team = list->entry;
3405 int i;
3406 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3407 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3408 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3409 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3410 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3411 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3412 for (i = 0; i < team->t.t_nproc; ++i) {
3413 __kmp_printf(" Thread %2d: ", i);
3414 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003415 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3417 __kmp_printf("\n");
3418 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003419 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003420
Jonathan Peyton30419822017-05-12 18:01:32 +00003421 // Print out __kmp_thread_pool and __kmp_team_pool.
3422 __kmp_printf("\n------------------------------\nPools\n----------------------"
3423 "--------\n");
3424 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003425 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003427 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 // Free team list.
3431 while (list != NULL) {
3432 kmp_team_list_item_t *item = list;
3433 list = list->next;
3434 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003435 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003436}
3437
3438#endif
3439
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440//---------------------------------------------------------------------------
3441// Stuff for per-thread fast random number generator
3442// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003443static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003444 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3445 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3446 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3447 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3448 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3449 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3450 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3451 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3452 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3453 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3454 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
3456//---------------------------------------------------------------------------
3457// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003458unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003459 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003461
Jonathan Peyton30419822017-05-12 18:01:32 +00003462 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003463
3464 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003465 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
3467 return r;
3468}
3469//--------------------------------------------------------
3470// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003471void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003472 unsigned seed = thread->th.th_info.ds.ds_tid;
3473
Jonathan Peyton30419822017-05-12 18:01:32 +00003474 thread->th.th_a =
3475 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3476 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3477 KA_TRACE(30,
3478 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479}
3480
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003482/* reclaim array entries for root threads that are already dead, returns number
3483 * reclaimed */
3484static int __kmp_reclaim_dead_roots(void) {
3485 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003486
Jonathan Peyton30419822017-05-12 18:01:32 +00003487 for (i = 0; i < __kmp_threads_capacity; ++i) {
3488 if (KMP_UBER_GTID(i) &&
3489 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3490 !__kmp_root[i]
3491 ->r.r_active) { // AC: reclaim only roots died in non-active state
3492 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003494 }
3495 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003496}
3497#endif
3498
Jonathan Peyton30419822017-05-12 18:01:32 +00003499/* This function attempts to create free entries in __kmp_threads and
3500 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501
Jonathan Peyton30419822017-05-12 18:01:32 +00003502 For Windows* OS static library, the first mechanism used is to reclaim array
3503 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003504
Jonathan Peyton30419822017-05-12 18:01:32 +00003505 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3506 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3507 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3508 threadprivate cache array has been created. Synchronization with
3509 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003510
Jonathan Peyton30419822017-05-12 18:01:32 +00003511 After any dead root reclamation, if the clipping value allows array expansion
3512 to result in the generation of a total of nWish free slots, the function does
3513 that expansion. If not, but the clipping value allows array expansion to
3514 result in the generation of a total of nNeed free slots, the function does
3515 that expansion. Otherwise, nothing is done beyond the possible initial root
3516 thread reclamation. However, if nNeed is zero, a best-effort attempt is made
3517 to fulfil nWish as far as possible, i.e. the function will attempt to create
Jim Cownie5e8470a2013-09-27 10:38:44 +00003518 as many free slots as possible up to nWish.
3519
Jonathan Peyton30419822017-05-12 18:01:32 +00003520 If any argument is negative, the behavior is undefined. */
3521static int __kmp_expand_threads(int nWish, int nNeed) {
3522 int added = 0;
3523 int old_tp_cached;
3524 int __kmp_actual_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003525
Jonathan Peyton30419822017-05-12 18:01:32 +00003526 if (nNeed > nWish) /* normalize the arguments */
3527 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003528#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003529 /* only for Windows static library */
3530 /* reclaim array entries for root threads that are already dead */
3531 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003532
Jonathan Peyton30419822017-05-12 18:01:32 +00003533 if (nNeed) {
3534 nNeed -= added;
3535 if (nNeed < 0)
3536 nNeed = 0;
3537 }
3538 if (nWish) {
3539 nWish -= added;
3540 if (nWish < 0)
3541 nWish = 0;
3542 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 if (nWish <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003545 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003546
3547 while (1) {
3548 int nTarget;
3549 int minimumRequiredCapacity;
3550 int newCapacity;
3551 kmp_info_t **newThreads;
3552 kmp_root_t **newRoot;
3553
3554 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3555 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
Jonathan Peytonf4392462017-07-27 20:58:41 +00003556 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
Jonathan Peyton30419822017-05-12 18:01:32 +00003557 // > __kmp_max_nth in one of two ways:
3558 //
3559 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3560 // may not be resused by another thread, so we may need to increase
Jonathan Peyton09244f32017-07-26 20:07:58 +00003561 // __kmp_threads_capacity to __kmp_max_nth + 1.
Jonathan Peyton30419822017-05-12 18:01:32 +00003562 //
3563 // 2) New foreign root(s) are encountered. We always register new foreign
3564 // roots. This may cause a smaller # of threads to be allocated at
3565 // subsequent parallel regions, but the worker threads hang around (and
3566 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3567 //
3568 // Anyway, that is the reason for moving the check to see if
Jonathan Peyton09244f32017-07-26 20:07:58 +00003569 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
Jonathan Peyton30419822017-05-12 18:01:32 +00003570 // instead of having it performed here. -BB
3571 old_tp_cached = __kmp_tp_cached;
3572 __kmp_actual_max_nth =
3573 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3574 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3575
3576 /* compute expansion headroom to check if we can expand and whether to aim
3577 for nWish or nNeed */
3578 nTarget = nWish;
3579 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3580 /* can't fulfil nWish, so try nNeed */
3581 if (nNeed) {
3582 nTarget = nNeed;
3583 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3584 /* possible expansion too small -- give up */
3585 break;
3586 }
3587 } else {
3588 /* best-effort */
3589 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3590 if (!nTarget) {
3591 /* can expand at all -- give up */
3592 break;
3593 }
3594 }
3595 }
3596 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3597
3598 newCapacity = __kmp_threads_capacity;
3599 do {
3600 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3601 ? (newCapacity << 1)
3602 : __kmp_actual_max_nth;
3603 } while (newCapacity < minimumRequiredCapacity);
3604 newThreads = (kmp_info_t **)__kmp_allocate(
3605 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity +
3606 CACHE_LINE);
3607 newRoot = (kmp_root_t **)((char *)newThreads +
3608 sizeof(kmp_info_t *) * newCapacity);
3609 KMP_MEMCPY(newThreads, __kmp_threads,
3610 __kmp_threads_capacity * sizeof(kmp_info_t *));
3611 KMP_MEMCPY(newRoot, __kmp_root,
3612 __kmp_threads_capacity * sizeof(kmp_root_t *));
3613 memset(newThreads + __kmp_threads_capacity, 0,
3614 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *));
3615 memset(newRoot + __kmp_threads_capacity, 0,
3616 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *));
3617
3618 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3619 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has
3620 allocated a threadprivate cache while we were allocating the expanded
3621 array, and our new capacity is larger than the threadprivate cache
3622 capacity, so we should deallocate the expanded arrays and try again.
3623 This is the first check of a double-check pair. */
3624 __kmp_free(newThreads);
3625 continue; /* start over and try again */
3626 }
3627 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3628 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3629 /* Same check as above, but this time with the lock so we can be sure if
3630 we can succeed. */
3631 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3632 __kmp_free(newThreads);
3633 continue; /* start over and try again */
3634 } else {
3635 /* success */
3636 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be
3637 // investigated.
3638 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3639 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3640 added += newCapacity - __kmp_threads_capacity;
3641 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3642 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3643 break; /* succeeded, so we can exit the loop */
3644 }
3645 }
3646 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647}
3648
Jonathan Peyton30419822017-05-12 18:01:32 +00003649/* Register the current thread as a root thread and obtain our gtid. We must
3650 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3651 thread that calls from __kmp_do_serial_initialize() */
3652int __kmp_register_root(int initial_thread) {
3653 kmp_info_t *root_thread;
3654 kmp_root_t *root;
3655 int gtid;
3656 int capacity;
3657 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3658 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3659 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003660
Jonathan Peyton30419822017-05-12 18:01:32 +00003661 /* 2007-03-02:
3662 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3663 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3664 work as expected -- it may return false (that means there is at least one
3665 empty slot in __kmp_threads array), but it is possible the only free slot
3666 is #0, which is reserved for initial thread and so cannot be used for this
3667 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003668
Jonathan Peyton30419822017-05-12 18:01:32 +00003669 However, right solution seems to be not reserving slot #0 for initial
3670 thread because:
3671 (1) there is no magic in slot #0,
3672 (2) we cannot detect initial thread reliably (the first thread which does
3673 serial initialization may be not a real initial thread).
3674 */
3675 capacity = __kmp_threads_capacity;
3676 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3677 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003678 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003679
Jonathan Peyton30419822017-05-12 18:01:32 +00003680 /* see if there are too many threads */
3681 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3682 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003683 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3684 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3685 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003686 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003687 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3688 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003690 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003691
3692 /* find an available thread slot */
3693 /* Don't reassign the zero slot since we need that to only be used by initial
3694 thread */
3695 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3696 gtid++)
3697 ;
3698 KA_TRACE(1,
3699 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3700 KMP_ASSERT(gtid < __kmp_threads_capacity);
3701
3702 /* update global accounting */
3703 __kmp_all_nth++;
3704 TCW_4(__kmp_nth, __kmp_nth + 1);
3705
3706 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3707 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3708 if (__kmp_adjust_gtid_mode) {
3709 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3710 if (TCR_4(__kmp_gtid_mode) != 2) {
3711 TCW_4(__kmp_gtid_mode, 2);
3712 }
3713 } else {
3714 if (TCR_4(__kmp_gtid_mode) != 1) {
3715 TCW_4(__kmp_gtid_mode, 1);
3716 }
3717 }
3718 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003719
3720#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003721 /* Adjust blocktime to zero if necessary */
3722 /* Middle initialization might not have occurred yet */
3723 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3724 if (__kmp_nth > __kmp_avail_proc) {
3725 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003726 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003727 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728#endif /* KMP_ADJUST_BLOCKTIME */
3729
Jonathan Peyton30419822017-05-12 18:01:32 +00003730 /* setup this new hierarchy */
3731 if (!(root = __kmp_root[gtid])) {
3732 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3733 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3734 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003736#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003737 // Initialize stats as soon as possible (right after gtid assignment).
3738 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3739 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3740 KMP_SET_THREAD_STATE(SERIAL_REGION);
3741 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003742#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003743 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745 /* setup new root thread structure */
3746 if (root->r.r_uber_thread) {
3747 root_thread = root->r.r_uber_thread;
3748 } else {
3749 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3750 if (__kmp_storage_map) {
3751 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 root_thread->th.th_info.ds.ds_gtid = gtid;
Joachim Protze82e94a52017-11-01 10:08:30 +00003754#if OMPT_SUPPORT
3755 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3756#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003757 root_thread->th.th_root = root;
3758 if (__kmp_env_consistency_check) {
3759 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3760 }
3761#if USE_FAST_MEMORY
3762 __kmp_initialize_fast_memory(root_thread);
3763#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003764
Jonathan Peyton30419822017-05-12 18:01:32 +00003765#if KMP_USE_BGET
3766 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3767 __kmp_initialize_bget(root_thread);
3768#endif
3769 __kmp_init_random(root_thread); // Initialize random number generator
3770 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003771
Jonathan Peyton30419822017-05-12 18:01:32 +00003772 /* setup the serial team held in reserve by the root thread */
3773 if (!root_thread->th.th_serial_team) {
3774 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3775 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3776 root_thread->th.th_serial_team =
3777 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003778#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003779 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003780#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003781#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003782 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003783#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003784 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3785 }
3786 KMP_ASSERT(root_thread->th.th_serial_team);
3787 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3788 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003789
Jonathan Peyton30419822017-05-12 18:01:32 +00003790 /* drop root_thread into place */
3791 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003792
Jonathan Peyton30419822017-05-12 18:01:32 +00003793 root->r.r_root_team->t.t_threads[0] = root_thread;
3794 root->r.r_hot_team->t.t_threads[0] = root_thread;
3795 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3796 // AC: the team created in reserve, not for execution (it is unused for now).
3797 root_thread->th.th_serial_team->t.t_serialized = 0;
3798 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800 /* initialize the thread, get it ready to go */
3801 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3802 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003803
Jonathan Peyton30419822017-05-12 18:01:32 +00003804 /* prepare the master thread for get_gtid() */
3805 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003806
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003807#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003808 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003809#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003810
Jonathan Peyton30419822017-05-12 18:01:32 +00003811#ifdef KMP_TDATA_GTID
3812 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003813#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003814 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3815 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3816
3817 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3818 "plain=%u\n",
3819 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3820 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3821 KMP_INIT_BARRIER_STATE));
3822 { // Initialize barrier data.
3823 int b;
3824 for (b = 0; b < bs_last_barrier; ++b) {
3825 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3826#if USE_DEBUGGER
3827 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3828#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003829 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003830 }
3831 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3832 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003833
Alp Toker763b9392014-02-28 09:42:41 +00003834#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003835#if OMP_40_ENABLED
3836 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3837 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3838 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3839 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3840#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003841
Jonathan Peyton30419822017-05-12 18:01:32 +00003842 if (TCR_4(__kmp_init_middle)) {
3843 __kmp_affinity_set_init_mask(gtid, TRUE);
3844 }
Alp Toker763b9392014-02-28 09:42:41 +00003845#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003848
Joachim Protze82e94a52017-11-01 10:08:30 +00003849#if OMPT_SUPPORT
3850 if (!initial_thread && ompt_enabled.enabled) {
3851
3852 ompt_thread_t *root_thread = ompt_get_thread();
3853
3854 ompt_set_thread_state(root_thread, omp_state_overhead);
3855
3856 if (ompt_enabled.ompt_callback_thread_begin) {
3857 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3858 ompt_thread_initial, __ompt_get_thread_data_internal());
3859 }
3860 ompt_data_t *task_data;
3861 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3862 if (ompt_enabled.ompt_callback_task_create) {
3863 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3864 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3865 // initial task has nothing to return to
3866 }
3867
3868 ompt_set_thread_state(root_thread, omp_state_work_serial);
3869 }
3870#endif
3871
Jonathan Peyton30419822017-05-12 18:01:32 +00003872 KMP_MB();
3873 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003874
Jonathan Peyton30419822017-05-12 18:01:32 +00003875 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003876}
3877
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003878#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003879static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3880 const int max_level) {
3881 int i, n, nth;
3882 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3883 if (!hot_teams || !hot_teams[level].hot_team) {
3884 return 0;
3885 }
3886 KMP_DEBUG_ASSERT(level < max_level);
3887 kmp_team_t *team = hot_teams[level].hot_team;
3888 nth = hot_teams[level].hot_team_nth;
3889 n = nth - 1; // master is not freed
3890 if (level < max_level - 1) {
3891 for (i = 0; i < nth; ++i) {
3892 kmp_info_t *th = team->t.t_threads[i];
3893 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3894 if (i > 0 && th->th.th_hot_teams) {
3895 __kmp_free(th->th.th_hot_teams);
3896 th->th.th_hot_teams = NULL;
3897 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003898 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003899 }
3900 __kmp_free_team(root, team, NULL);
3901 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003902}
3903#endif
3904
Jonathan Peyton30419822017-05-12 18:01:32 +00003905// Resets a root thread and clear its root and hot teams.
3906// Returns the number of __kmp_threads entries directly and indirectly freed.
3907static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3908 kmp_team_t *root_team = root->r.r_root_team;
3909 kmp_team_t *hot_team = root->r.r_hot_team;
3910 int n = hot_team->t.t_nproc;
3911 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003912
Jonathan Peyton30419822017-05-12 18:01:32 +00003913 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003914
Jonathan Peyton30419822017-05-12 18:01:32 +00003915 root->r.r_root_team = NULL;
3916 root->r.r_hot_team = NULL;
3917 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3918 // before call to __kmp_free_team().
3919 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003920#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003921 if (__kmp_hot_teams_max_level >
3922 0) { // need to free nested hot teams and their threads if any
3923 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3924 kmp_info_t *th = hot_team->t.t_threads[i];
3925 if (__kmp_hot_teams_max_level > 1) {
3926 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3927 }
3928 if (th->th.th_hot_teams) {
3929 __kmp_free(th->th.th_hot_teams);
3930 th->th.th_hot_teams = NULL;
3931 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003932 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003933 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003934#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003935 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003936
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 // Before we can reap the thread, we need to make certain that all other
3938 // threads in the teams that had this root as ancestor have stopped trying to
3939 // steal tasks.
3940 if (__kmp_tasking_mode != tskm_immediate_exec) {
3941 __kmp_wait_to_unref_task_teams();
3942 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003943
Jonathan Peyton30419822017-05-12 18:01:32 +00003944#if KMP_OS_WINDOWS
3945 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3946 KA_TRACE(
3947 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3948 "\n",
3949 (LPVOID) & (root->r.r_uber_thread->th),
3950 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3951 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3952#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003954#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003955 if (ompt_enabled.ompt_callback_thread_end) {
3956 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3957 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
Jonathan Peyton30419822017-05-12 18:01:32 +00003958 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003959#endif
3960
Jonathan Peyton30419822017-05-12 18:01:32 +00003961 TCW_4(__kmp_nth,
3962 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peytonf4392462017-07-27 20:58:41 +00003963 root->r.r_cg_nthreads--;
3964
Jonathan Peyton30419822017-05-12 18:01:32 +00003965 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003966
Jonathan Peyton30419822017-05-12 18:01:32 +00003967 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3968 // of freeing.
3969 root->r.r_uber_thread = NULL;
3970 /* mark root as no longer in use */
3971 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003972
Jonathan Peyton30419822017-05-12 18:01:32 +00003973 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974}
3975
Jonathan Peyton30419822017-05-12 18:01:32 +00003976void __kmp_unregister_root_current_thread(int gtid) {
3977 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3978 /* this lock should be ok, since unregister_root_current_thread is never
3979 called during an abort, only during a normal close. furthermore, if you
3980 have the forkjoin lock, you should never try to get the initz lock */
3981 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3982 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3983 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3984 "exiting T#%d\n",
3985 gtid));
3986 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3987 return;
3988 }
3989 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003990
Jonathan Peyton30419822017-05-12 18:01:32 +00003991 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3992 KMP_ASSERT(KMP_UBER_GTID(gtid));
3993 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3994 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995
Jonathan Peyton30419822017-05-12 18:01:32 +00003996 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003997
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003998#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003999 kmp_info_t *thread = __kmp_threads[gtid];
4000 kmp_team_t *team = thread->th.th_team;
4001 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004002
Jonathan Peyton30419822017-05-12 18:01:32 +00004003 // we need to wait for the proxy tasks before finishing the thread
4004 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004005#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004006 // the runtime is shutting down so we won't report any events
Joachim Protze82e94a52017-11-01 10:08:30 +00004007 thread->th.ompt_thread_info.state = omp_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004008#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004009 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4010 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004011#endif
4012
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004014
Jonathan Peyton30419822017-05-12 18:01:32 +00004015 /* free up this thread slot */
4016 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004017#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00004018 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004019#endif
4020
Jonathan Peyton30419822017-05-12 18:01:32 +00004021 KMP_MB();
4022 KC_TRACE(10,
4023 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004024
Jonathan Peyton30419822017-05-12 18:01:32 +00004025 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004026}
4027
Jonathan Peyton2321d572015-06-08 19:25:25 +00004028#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004029/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 Unregisters a root thread that is not the current thread. Returns the number
4031 of __kmp_threads entries freed as a result. */
4032static int __kmp_unregister_root_other_thread(int gtid) {
4033 kmp_root_t *root = __kmp_root[gtid];
4034 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004035
Jonathan Peyton30419822017-05-12 18:01:32 +00004036 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4037 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4038 KMP_ASSERT(KMP_UBER_GTID(gtid));
4039 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4040 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004041
Jonathan Peyton30419822017-05-12 18:01:32 +00004042 r = __kmp_reset_root(gtid, root);
4043 KC_TRACE(10,
4044 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4045 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004046}
Jonathan Peyton2321d572015-06-08 19:25:25 +00004047#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004048
Jim Cownie5e8470a2013-09-27 10:38:44 +00004049#if KMP_DEBUG
4050void __kmp_task_info() {
4051
Jonathan Peyton30419822017-05-12 18:01:32 +00004052 kmp_int32 gtid = __kmp_entry_gtid();
4053 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4054 kmp_info_t *this_thr = __kmp_threads[gtid];
4055 kmp_team_t *steam = this_thr->th.th_serial_team;
4056 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004057
Jonathan Peyton30419822017-05-12 18:01:32 +00004058 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
4059 "ptask=%p\n",
4060 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4061 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004062}
4063#endif // KMP_DEBUG
4064
Jonathan Peyton30419822017-05-12 18:01:32 +00004065/* TODO optimize with one big memclr, take out what isn't needed, split
4066 responsibility to workers as much as possible, and delay initialization of
4067 features as much as possible */
4068static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4069 int tid, int gtid) {
4070 /* this_thr->th.th_info.ds.ds_gtid is setup in
4071 kmp_allocate_thread/create_worker.
4072 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4073 kmp_info_t *master = team->t.t_threads[0];
4074 KMP_DEBUG_ASSERT(this_thr != NULL);
4075 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4076 KMP_DEBUG_ASSERT(team);
4077 KMP_DEBUG_ASSERT(team->t.t_threads);
4078 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4079 KMP_DEBUG_ASSERT(master);
4080 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081
Jonathan Peyton30419822017-05-12 18:01:32 +00004082 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004083
Jonathan Peyton30419822017-05-12 18:01:32 +00004084 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085
Jonathan Peyton30419822017-05-12 18:01:32 +00004086 this_thr->th.th_info.ds.ds_tid = tid;
4087 this_thr->th.th_set_nproc = 0;
4088 if (__kmp_tasking_mode != tskm_immediate_exec)
4089 // When tasking is possible, threads are not safe to reap until they are
4090 // done tasking; this will be set when tasking code is exited in wait
4091 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4092 else // no tasking --> always safe to reap
4093 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004094#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004095 this_thr->th.th_set_proc_bind = proc_bind_default;
4096#if KMP_AFFINITY_SUPPORTED
4097 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004098#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004099#endif
4100 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101
Jonathan Peyton30419822017-05-12 18:01:32 +00004102 /* setup the thread's cache of the team structure */
4103 this_thr->th.th_team_nproc = team->t.t_nproc;
4104 this_thr->th.th_team_master = master;
4105 this_thr->th.th_team_serialized = team->t.t_serialized;
4106 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004107
Jonathan Peyton30419822017-05-12 18:01:32 +00004108 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004109
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4111 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112
Jonathan Peyton30419822017-05-12 18:01:32 +00004113 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4114 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004115
Jonathan Peyton30419822017-05-12 18:01:32 +00004116 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4117 tid, gtid, this_thr, this_thr->th.th_current_task));
4118 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4119 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004120
Jonathan Peyton30419822017-05-12 18:01:32 +00004121 /* TODO no worksharing in speculative threads */
4122 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004123
Jonathan Peyton30419822017-05-12 18:01:32 +00004124 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004125
Jonathan Peyton30419822017-05-12 18:01:32 +00004126 if (!this_thr->th.th_pri_common) {
4127 this_thr->th.th_pri_common =
4128 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4129 if (__kmp_storage_map) {
4130 __kmp_print_storage_map_gtid(
4131 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4132 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004133 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004134 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004135 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004136
Jonathan Peyton30419822017-05-12 18:01:32 +00004137 /* Initialize dynamic dispatch */
4138 {
4139 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4140 // Use team max_nproc since this will never change for the team.
4141 size_t disp_size =
4142 sizeof(dispatch_private_info_t) *
4143 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4144 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4145 team->t.t_max_nproc));
4146 KMP_ASSERT(dispatch);
4147 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4148 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004149
Jonathan Peyton30419822017-05-12 18:01:32 +00004150 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004151#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004152 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004153#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004154 if (!dispatch->th_disp_buffer) {
4155 dispatch->th_disp_buffer =
4156 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004157
Jonathan Peyton30419822017-05-12 18:01:32 +00004158 if (__kmp_storage_map) {
4159 __kmp_print_storage_map_gtid(
4160 gtid, &dispatch->th_disp_buffer[0],
4161 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4162 ? 1
4163 : __kmp_dispatch_num_buffers],
4164 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4165 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4166 gtid, team->t.t_id, gtid);
4167 }
4168 } else {
4169 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170 }
4171
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 dispatch->th_dispatch_pr_current = 0;
4173 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004174
Jonathan Peyton30419822017-05-12 18:01:32 +00004175 dispatch->th_deo_fcn = 0; /* ORDERED */
4176 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4177 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004178
Jonathan Peyton30419822017-05-12 18:01:32 +00004179 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004180
Jonathan Peyton30419822017-05-12 18:01:32 +00004181 if (!this_thr->th.th_task_state_memo_stack) {
4182 size_t i;
4183 this_thr->th.th_task_state_memo_stack =
4184 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4185 this_thr->th.th_task_state_top = 0;
4186 this_thr->th.th_task_state_stack_sz = 4;
4187 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4188 ++i) // zero init the stack
4189 this_thr->th.th_task_state_memo_stack[i] = 0;
4190 }
4191
4192 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4193 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4194
4195 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004196}
4197
Jonathan Peyton30419822017-05-12 18:01:32 +00004198/* allocate a new thread for the requesting team. this is only called from
4199 within a forkjoin critical section. we will first try to get an available
4200 thread from the thread pool. if none is available, we will fork a new one
4201 assuming we are able to create a new one. this should be assured, as the
4202 caller should check on this first. */
4203kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4204 int new_tid) {
4205 kmp_team_t *serial_team;
4206 kmp_info_t *new_thr;
4207 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004208
Jonathan Peyton30419822017-05-12 18:01:32 +00004209 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4210 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004211#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004212 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004213#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004214 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004215
Jonathan Peyton30419822017-05-12 18:01:32 +00004216 /* first, try to get one from the thread pool */
4217 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004219 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004220 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4221 if (new_thr == __kmp_thread_pool_insert_pt) {
4222 __kmp_thread_pool_insert_pt = NULL;
4223 }
4224 TCW_4(new_thr->th.th_in_pool, FALSE);
4225 // Don't touch th_active_in_pool or th_active.
4226 // The worker thread adjusts those flags as it sleeps/awakens.
4227 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004228
Jonathan Peyton30419822017-05-12 18:01:32 +00004229 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4230 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4231 KMP_ASSERT(!new_thr->th.th_team);
4232 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4233 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004234
Jonathan Peyton30419822017-05-12 18:01:32 +00004235 /* setup the thread structure */
4236 __kmp_initialize_info(new_thr, team, new_tid,
4237 new_thr->th.th_info.ds.ds_gtid);
4238 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004239
Jonathan Peyton30419822017-05-12 18:01:32 +00004240 TCW_4(__kmp_nth, __kmp_nth + 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00004241 root->r.r_cg_nthreads++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004242
Jonathan Peyton30419822017-05-12 18:01:32 +00004243 new_thr->th.th_task_state = 0;
4244 new_thr->th.th_task_state_top = 0;
4245 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004246
Jim Cownie5e8470a2013-09-27 10:38:44 +00004247#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004248 /* Adjust blocktime back to zero if necessary */
4249 /* Middle initialization might not have occurred yet */
4250 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4251 if (__kmp_nth > __kmp_avail_proc) {
4252 __kmp_zero_bt = TRUE;
4253 }
4254 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004255#endif /* KMP_ADJUST_BLOCKTIME */
4256
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004257#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004258 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4259 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004260 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004261 kmp_balign_t *balign = new_thr->th.th_bar;
4262 for (b = 0; b < bs_last_barrier; ++b)
4263 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004264#endif
4265
Jonathan Peyton30419822017-05-12 18:01:32 +00004266 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4267 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004268
Jim Cownie5e8470a2013-09-27 10:38:44 +00004269 KMP_MB();
4270 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004271 }
4272
4273 /* no, well fork a new one */
4274 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4275 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4276
4277#if KMP_USE_MONITOR
4278 // If this is the first worker thread the RTL is creating, then also
4279 // launch the monitor thread. We try to do this as early as possible.
4280 if (!TCR_4(__kmp_init_monitor)) {
4281 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4282 if (!TCR_4(__kmp_init_monitor)) {
4283 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4284 TCW_4(__kmp_init_monitor, 1);
4285 __kmp_create_monitor(&__kmp_monitor);
4286 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4287#if KMP_OS_WINDOWS
4288 // AC: wait until monitor has started. This is a fix for CQ232808.
4289 // The reason is that if the library is loaded/unloaded in a loop with
4290 // small (parallel) work in between, then there is high probability that
4291 // monitor thread started after the library shutdown. At shutdown it is
4292 // too late to cope with the problem, because when the master is in
4293 // DllMain (process detach) the monitor has no chances to start (it is
4294 // blocked), and master has no means to inform the monitor that the
4295 // library has gone, because all the memory which the monitor can access
4296 // is going to be released/reset.
4297 while (TCR_4(__kmp_init_monitor) < 2) {
4298 KMP_YIELD(TRUE);
4299 }
4300 KF_TRACE(10, ("after monitor thread has started\n"));
4301#endif
4302 }
4303 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4304 }
4305#endif
4306
4307 KMP_MB();
4308 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4309 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4310 }
4311
4312 /* allocate space for it. */
4313 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4314
4315 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4316
4317 if (__kmp_storage_map) {
4318 __kmp_print_thread_storage_map(new_thr, new_gtid);
4319 }
4320
4321 // add the reserve serialized team, initialized from the team's master thread
4322 {
4323 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4324 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4325 new_thr->th.th_serial_team = serial_team =
4326 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4327#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004328 ompt_data_none, // root parallel id
Jonathan Peyton30419822017-05-12 18:01:32 +00004329#endif
4330#if OMP_40_ENABLED
4331 proc_bind_default,
4332#endif
4333 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4334 }
4335 KMP_ASSERT(serial_team);
4336 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4337 // execution (it is unused for now).
4338 serial_team->t.t_threads[0] = new_thr;
4339 KF_TRACE(10,
4340 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4341 new_thr));
4342
4343 /* setup the thread structures */
4344 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4345
4346#if USE_FAST_MEMORY
4347 __kmp_initialize_fast_memory(new_thr);
4348#endif /* USE_FAST_MEMORY */
4349
4350#if KMP_USE_BGET
4351 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4352 __kmp_initialize_bget(new_thr);
4353#endif
4354
4355 __kmp_init_random(new_thr); // Initialize random number generator
4356
4357 /* Initialize these only once when thread is grabbed for a team allocation */
4358 KA_TRACE(20,
4359 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4360 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4361
4362 int b;
4363 kmp_balign_t *balign = new_thr->th.th_bar;
4364 for (b = 0; b < bs_last_barrier; ++b) {
4365 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4366 balign[b].bb.team = NULL;
4367 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4368 balign[b].bb.use_oncore_barrier = 0;
4369 }
4370
4371 new_thr->th.th_spin_here = FALSE;
4372 new_thr->th.th_next_waiting = 0;
4373
4374#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4375 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4376 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4377 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4378 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4379#endif
4380
4381 TCW_4(new_thr->th.th_in_pool, FALSE);
4382 new_thr->th.th_active_in_pool = FALSE;
4383 TCW_4(new_thr->th.th_active, TRUE);
4384
4385 /* adjust the global counters */
4386 __kmp_all_nth++;
4387 __kmp_nth++;
4388
Jonathan Peytonf4392462017-07-27 20:58:41 +00004389 root->r.r_cg_nthreads++;
4390
Jonathan Peyton30419822017-05-12 18:01:32 +00004391 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4392 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4393 if (__kmp_adjust_gtid_mode) {
4394 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4395 if (TCR_4(__kmp_gtid_mode) != 2) {
4396 TCW_4(__kmp_gtid_mode, 2);
4397 }
4398 } else {
4399 if (TCR_4(__kmp_gtid_mode) != 1) {
4400 TCW_4(__kmp_gtid_mode, 1);
4401 }
4402 }
4403 }
4404
4405#ifdef KMP_ADJUST_BLOCKTIME
4406 /* Adjust blocktime back to zero if necessary */
4407 /* Middle initialization might not have occurred yet */
4408 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4409 if (__kmp_nth > __kmp_avail_proc) {
4410 __kmp_zero_bt = TRUE;
4411 }
4412 }
4413#endif /* KMP_ADJUST_BLOCKTIME */
4414
4415 /* actually fork it and create the new worker thread */
4416 KF_TRACE(
4417 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4418 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4419 KF_TRACE(10,
4420 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4421
4422 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4423 new_gtid));
4424 KMP_MB();
4425 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004426}
4427
Jonathan Peyton30419822017-05-12 18:01:32 +00004428/* Reinitialize team for reuse.
4429 The hot team code calls this case at every fork barrier, so EPCC barrier
4430 test are extremely sensitive to changes in it, esp. writes to the team
4431 struct, which cause a cache invalidation in all threads.
4432 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4433static void __kmp_reinitialize_team(kmp_team_t *team,
4434 kmp_internal_control_t *new_icvs,
4435 ident_t *loc) {
4436 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4437 team->t.t_threads[0], team));
4438 KMP_DEBUG_ASSERT(team && new_icvs);
4439 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4440 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004441
Jonathan Peyton30419822017-05-12 18:01:32 +00004442 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004443 // Copy ICVs to the master thread's implicit taskdata
4444 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4445 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004446
Jonathan Peyton30419822017-05-12 18:01:32 +00004447 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4448 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004449}
4450
Jonathan Peyton30419822017-05-12 18:01:32 +00004451/* Initialize the team data structure.
4452 This assumes the t_threads and t_max_nproc are already set.
4453 Also, we don't touch the arguments */
4454static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4455 kmp_internal_control_t *new_icvs,
4456 ident_t *loc) {
4457 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004458
Jonathan Peyton30419822017-05-12 18:01:32 +00004459 /* verify */
4460 KMP_DEBUG_ASSERT(team);
4461 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4462 KMP_DEBUG_ASSERT(team->t.t_threads);
4463 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004464
Jonathan Peyton30419822017-05-12 18:01:32 +00004465 team->t.t_master_tid = 0; /* not needed */
4466 /* team->t.t_master_bar; not needed */
4467 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4468 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004469
Jonathan Peyton30419822017-05-12 18:01:32 +00004470 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4471 team->t.t_next_pool = NULL;
4472 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4473 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004474
Jonathan Peyton30419822017-05-12 18:01:32 +00004475 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4476 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004477
Jonathan Peyton30419822017-05-12 18:01:32 +00004478 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4479 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004480
4481#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004482 team->t.t_fp_control_saved = FALSE; /* not needed */
4483 team->t.t_x87_fpu_control_word = 0; /* not needed */
4484 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004485#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4486
Jonathan Peyton30419822017-05-12 18:01:32 +00004487 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004488
Jonathan Peyton30419822017-05-12 18:01:32 +00004489 team->t.t_ordered.dt.t_value = 0;
4490 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004491
Jonathan Peyton30419822017-05-12 18:01:32 +00004492 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004493
4494#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004495 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004496#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004497 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004498
Jonathan Peyton30419822017-05-12 18:01:32 +00004499 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004500
Jonathan Peyton30419822017-05-12 18:01:32 +00004501 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004502
Jonathan Peyton30419822017-05-12 18:01:32 +00004503 KMP_MB();
4504 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004505}
4506
Alp Toker98758b02014-03-02 04:12:06 +00004507#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004508/* Sets full mask for thread and returns old mask, no changes to structures. */
4509static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004510__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4511 if (KMP_AFFINITY_CAPABLE()) {
4512 int status;
4513 if (old_mask != NULL) {
4514 status = __kmp_get_system_affinity(old_mask, TRUE);
4515 int error = errno;
4516 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004517 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4518 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004520 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004521 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4522 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004523}
4524#endif
4525
Alp Toker98758b02014-03-02 04:12:06 +00004526#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004527
Jim Cownie5e8470a2013-09-27 10:38:44 +00004528// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4529// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004530// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004531// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004532static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4533 // Copy the master thread's place partion to the team struct
4534 kmp_info_t *master_th = team->t.t_threads[0];
4535 KMP_DEBUG_ASSERT(master_th != NULL);
4536 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4537 int first_place = master_th->th.th_first_place;
4538 int last_place = master_th->th.th_last_place;
4539 int masters_place = master_th->th.th_current_place;
4540 team->t.t_first_place = first_place;
4541 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004542
Jonathan Peyton30419822017-05-12 18:01:32 +00004543 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4544 "bound to place %d partition = [%d,%d]\n",
4545 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4546 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004547
Jonathan Peyton30419822017-05-12 18:01:32 +00004548 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004549
Jonathan Peyton30419822017-05-12 18:01:32 +00004550 case proc_bind_default:
4551 // serial teams might have the proc_bind policy set to proc_bind_default. It
4552 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4553 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4554 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004555
Jonathan Peyton30419822017-05-12 18:01:32 +00004556 case proc_bind_master: {
4557 int f;
4558 int n_th = team->t.t_nproc;
4559 for (f = 1; f < n_th; f++) {
4560 kmp_info_t *th = team->t.t_threads[f];
4561 KMP_DEBUG_ASSERT(th != NULL);
4562 th->th.th_first_place = first_place;
4563 th->th.th_last_place = last_place;
4564 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004565
Jonathan Peyton30419822017-05-12 18:01:32 +00004566 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4567 "partition = [%d,%d]\n",
4568 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4569 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004570 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004571 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004572
Jonathan Peyton30419822017-05-12 18:01:32 +00004573 case proc_bind_close: {
4574 int f;
4575 int n_th = team->t.t_nproc;
4576 int n_places;
4577 if (first_place <= last_place) {
4578 n_places = last_place - first_place + 1;
4579 } else {
4580 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4581 }
4582 if (n_th <= n_places) {
4583 int place = masters_place;
4584 for (f = 1; f < n_th; f++) {
4585 kmp_info_t *th = team->t.t_threads[f];
4586 KMP_DEBUG_ASSERT(th != NULL);
4587
4588 if (place == last_place) {
4589 place = first_place;
4590 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4591 place = 0;
4592 } else {
4593 place++;
4594 }
4595 th->th.th_first_place = first_place;
4596 th->th.th_last_place = last_place;
4597 th->th.th_new_place = place;
4598
4599 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4600 "partition = [%d,%d]\n",
4601 __kmp_gtid_from_thread(team->t.t_threads[f]),
4602 team->t.t_id, f, place, first_place, last_place));
4603 }
4604 } else {
4605 int S, rem, gap, s_count;
4606 S = n_th / n_places;
4607 s_count = 0;
4608 rem = n_th - (S * n_places);
4609 gap = rem > 0 ? n_places / rem : n_places;
4610 int place = masters_place;
4611 int gap_ct = gap;
4612 for (f = 0; f < n_th; f++) {
4613 kmp_info_t *th = team->t.t_threads[f];
4614 KMP_DEBUG_ASSERT(th != NULL);
4615
4616 th->th.th_first_place = first_place;
4617 th->th.th_last_place = last_place;
4618 th->th.th_new_place = place;
4619 s_count++;
4620
4621 if ((s_count == S) && rem && (gap_ct == gap)) {
4622 // do nothing, add an extra thread to place on next iteration
4623 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4624 // we added an extra thread to this place; move to next place
4625 if (place == last_place) {
4626 place = first_place;
4627 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4628 place = 0;
4629 } else {
4630 place++;
4631 }
4632 s_count = 0;
4633 gap_ct = 1;
4634 rem--;
4635 } else if (s_count == S) { // place full; don't add extra
4636 if (place == last_place) {
4637 place = first_place;
4638 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4639 place = 0;
4640 } else {
4641 place++;
4642 }
4643 gap_ct++;
4644 s_count = 0;
4645 }
4646
4647 KA_TRACE(100,
4648 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4649 "partition = [%d,%d]\n",
4650 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4651 th->th.th_new_place, first_place, last_place));
4652 }
4653 KMP_DEBUG_ASSERT(place == masters_place);
4654 }
4655 } break;
4656
4657 case proc_bind_spread: {
4658 int f;
4659 int n_th = team->t.t_nproc;
4660 int n_places;
4661 int thidx;
4662 if (first_place <= last_place) {
4663 n_places = last_place - first_place + 1;
4664 } else {
4665 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4666 }
4667 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004668 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004669
Paul Osmialowskia0162792017-08-10 23:04:11 +00004670 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4671 int S = n_places / n_th;
4672 int s_count, rem, gap, gap_ct;
4673
4674 place = masters_place;
4675 rem = n_places - n_th * S;
4676 gap = rem ? n_th / rem : 1;
4677 gap_ct = gap;
4678 thidx = n_th;
4679 if (update_master_only == 1)
4680 thidx = 1;
4681 for (f = 0; f < thidx; f++) {
4682 kmp_info_t *th = team->t.t_threads[f];
4683 KMP_DEBUG_ASSERT(th != NULL);
4684
4685 th->th.th_first_place = place;
4686 th->th.th_new_place = place;
4687 s_count = 1;
4688 while (s_count < S) {
4689 if (place == last_place) {
4690 place = first_place;
4691 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4692 place = 0;
4693 } else {
4694 place++;
4695 }
4696 s_count++;
4697 }
4698 if (rem && (gap_ct == gap)) {
4699 if (place == last_place) {
4700 place = first_place;
4701 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4702 place = 0;
4703 } else {
4704 place++;
4705 }
4706 rem--;
4707 gap_ct = 0;
4708 }
4709 th->th.th_last_place = place;
4710 gap_ct++;
4711
Jonathan Peyton30419822017-05-12 18:01:32 +00004712 if (place == last_place) {
4713 place = first_place;
4714 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4715 place = 0;
4716 } else {
4717 place++;
4718 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004719
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004720 KA_TRACE(100,
4721 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4722 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4723 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4724 f, th->th.th_new_place, th->th.th_first_place,
4725 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004726 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004727 } else {
4728 /* Having uniform space of available computation places I can create
4729 T partitions of round(P/T) size and put threads into the first
4730 place of each partition. */
4731 double current = static_cast<double>(masters_place);
4732 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004733 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004734 int first, last;
4735 kmp_info_t *th;
4736
4737 thidx = n_th + 1;
4738 if (update_master_only == 1)
4739 thidx = 1;
4740 for (f = 0; f < thidx; f++) {
4741 first = static_cast<int>(current);
4742 last = static_cast<int>(current + spacing) - 1;
4743 KMP_DEBUG_ASSERT(last >= first);
4744 if (first >= n_places) {
4745 if (masters_place) {
4746 first -= n_places;
4747 last -= n_places;
4748 if (first == (masters_place + 1)) {
4749 KMP_DEBUG_ASSERT(f == n_th);
4750 first--;
4751 }
4752 if (last == masters_place) {
4753 KMP_DEBUG_ASSERT(f == (n_th - 1));
4754 last--;
4755 }
4756 } else {
4757 KMP_DEBUG_ASSERT(f == n_th);
4758 first = 0;
4759 last = 0;
4760 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004761 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004762 if (last >= n_places) {
4763 last = (n_places - 1);
4764 }
4765 place = first;
4766 current += spacing;
4767 if (f < n_th) {
4768 KMP_DEBUG_ASSERT(0 <= first);
4769 KMP_DEBUG_ASSERT(n_places > first);
4770 KMP_DEBUG_ASSERT(0 <= last);
4771 KMP_DEBUG_ASSERT(n_places > last);
4772 KMP_DEBUG_ASSERT(last_place >= first_place);
4773 th = team->t.t_threads[f];
4774 KMP_DEBUG_ASSERT(th);
4775 th->th.th_first_place = first;
4776 th->th.th_new_place = place;
4777 th->th.th_last_place = last;
Jonathan Peyton30419822017-05-12 18:01:32 +00004778
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004779 KA_TRACE(100,
4780 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4781 "partition = [%d,%d], spacing = %.4f\n",
4782 __kmp_gtid_from_thread(team->t.t_threads[f]),
4783 team->t.t_id, f, th->th.th_new_place,
4784 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004785 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004786 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004787 }
4788 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4789 } else {
4790 int S, rem, gap, s_count;
4791 S = n_th / n_places;
4792 s_count = 0;
4793 rem = n_th - (S * n_places);
4794 gap = rem > 0 ? n_places / rem : n_places;
4795 int place = masters_place;
4796 int gap_ct = gap;
4797 thidx = n_th;
4798 if (update_master_only == 1)
4799 thidx = 1;
4800 for (f = 0; f < thidx; f++) {
4801 kmp_info_t *th = team->t.t_threads[f];
4802 KMP_DEBUG_ASSERT(th != NULL);
4803
4804 th->th.th_first_place = place;
4805 th->th.th_last_place = place;
4806 th->th.th_new_place = place;
4807 s_count++;
4808
4809 if ((s_count == S) && rem && (gap_ct == gap)) {
4810 // do nothing, add an extra thread to place on next iteration
4811 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4812 // we added an extra thread to this place; move on to next place
4813 if (place == last_place) {
4814 place = first_place;
4815 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4816 place = 0;
4817 } else {
4818 place++;
4819 }
4820 s_count = 0;
4821 gap_ct = 1;
4822 rem--;
4823 } else if (s_count == S) { // place is full; don't add extra thread
4824 if (place == last_place) {
4825 place = first_place;
4826 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4827 place = 0;
4828 } else {
4829 place++;
4830 }
4831 gap_ct++;
4832 s_count = 0;
4833 }
4834
4835 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4836 "partition = [%d,%d]\n",
4837 __kmp_gtid_from_thread(team->t.t_threads[f]),
4838 team->t.t_id, f, th->th.th_new_place,
4839 th->th.th_first_place, th->th.th_last_place));
4840 }
4841 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4842 }
4843 } break;
4844
4845 default:
4846 break;
4847 }
4848
4849 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004850}
4851
Alp Toker98758b02014-03-02 04:12:06 +00004852#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004853
Jonathan Peyton30419822017-05-12 18:01:32 +00004854/* allocate a new team data structure to use. take one off of the free pool if
4855 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004856kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004857__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004858#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004859 ompt_data_t ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004861#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004862 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004863#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004864 kmp_internal_control_t *new_icvs,
4865 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4866 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4867 int f;
4868 kmp_team_t *team;
4869 int use_hot_team = !root->r.r_active;
4870 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004871
Jonathan Peyton30419822017-05-12 18:01:32 +00004872 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4873 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4874 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4875 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004876
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004877#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004878 kmp_hot_team_ptr_t *hot_teams;
4879 if (master) {
4880 team = master->th.th_team;
4881 level = team->t.t_active_level;
4882 if (master->th.th_teams_microtask) { // in teams construct?
4883 if (master->th.th_teams_size.nteams > 1 &&
4884 ( // #teams > 1
4885 team->t.t_pkfn ==
4886 (microtask_t)__kmp_teams_master || // inner fork of the teams
4887 master->th.th_teams_level <
4888 team->t.t_level)) { // or nested parallel inside the teams
4889 ++level; // not increment if #teams==1, or for outer fork of the teams;
4890 // increment otherwise
4891 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004892 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004893 hot_teams = master->th.th_hot_teams;
4894 if (level < __kmp_hot_teams_max_level && hot_teams &&
4895 hot_teams[level]
4896 .hot_team) { // hot team has already been allocated for given level
4897 use_hot_team = 1;
4898 } else {
4899 use_hot_team = 0;
4900 }
4901 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004902#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004903 // Optimization to use a "hot" team
4904 if (use_hot_team && new_nproc > 1) {
4905 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004906#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004907 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004908#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004909 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004910#endif
4911#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004912 if (__kmp_tasking_mode != tskm_immediate_exec) {
4913 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4914 "task_team[1] = %p before reinit\n",
4915 team->t.t_task_team[0], team->t.t_task_team[1]));
4916 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004917#endif
4918
Jonathan Peyton30419822017-05-12 18:01:32 +00004919 // Has the number of threads changed?
4920 /* Let's assume the most common case is that the number of threads is
4921 unchanged, and put that case first. */
4922 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4923 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4924 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004925 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004926 if (team->t.t_size_changed == -1) {
4927 team->t.t_size_changed = 1;
4928 } else {
4929 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4930 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004931
Jonathan Peyton30419822017-05-12 18:01:32 +00004932 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4933 kmp_r_sched_t new_sched = new_icvs->sched;
4934 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4935 team->t.t_sched.chunk != new_sched.chunk)
4936 team->t.t_sched =
4937 new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004938
Jonathan Peyton30419822017-05-12 18:01:32 +00004939 __kmp_reinitialize_team(team, new_icvs,
4940 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004941
Jonathan Peyton30419822017-05-12 18:01:32 +00004942 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4943 team->t.t_threads[0], team));
4944 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004945
4946#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004947#if KMP_AFFINITY_SUPPORTED
4948 if ((team->t.t_size_changed == 0) &&
4949 (team->t.t_proc_bind == new_proc_bind)) {
4950 if (new_proc_bind == proc_bind_spread) {
4951 __kmp_partition_places(
4952 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004953 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004954 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4955 "proc_bind = %d, partition = [%d,%d]\n",
4956 team->t.t_id, new_proc_bind, team->t.t_first_place,
4957 team->t.t_last_place));
4958 } else {
4959 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4960 __kmp_partition_places(team);
4961 }
4962#else
4963 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4964#endif /* KMP_AFFINITY_SUPPORTED */
4965#endif /* OMP_40_ENABLED */
4966 } else if (team->t.t_nproc > new_nproc) {
4967 KA_TRACE(20,
4968 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4969 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004970
Jonathan Peyton30419822017-05-12 18:01:32 +00004971 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004972#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004973 if (__kmp_hot_teams_mode == 0) {
4974 // AC: saved number of threads should correspond to team's value in this
4975 // mode, can be bigger in mode 1, when hot team has threads in reserve
4976 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4977 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004978#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004979 /* release the extra threads we don't need any more */
4980 for (f = new_nproc; f < team->t.t_nproc; f++) {
4981 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4982 if (__kmp_tasking_mode != tskm_immediate_exec) {
4983 // When decreasing team size, threads no longer in the team should
4984 // unref task team.
4985 team->t.t_threads[f]->th.th_task_team = NULL;
4986 }
4987 __kmp_free_thread(team->t.t_threads[f]);
4988 team->t.t_threads[f] = NULL;
4989 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004990#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004991 } // (__kmp_hot_teams_mode == 0)
4992 else {
4993 // When keeping extra threads in team, switch threads to wait on own
4994 // b_go flag
4995 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4996 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4997 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4998 for (int b = 0; b < bs_last_barrier; ++b) {
4999 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5000 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005001 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005002 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5003 }
5004 }
5005 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005006#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005007 team->t.t_nproc = new_nproc;
5008 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5009 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
5010 team->t.t_sched.chunk != new_icvs->sched.chunk)
5011 team->t.t_sched = new_icvs->sched;
5012 __kmp_reinitialize_team(team, new_icvs,
5013 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005014
Jonathan Peyton30419822017-05-12 18:01:32 +00005015 /* update the remaining threads */
5016 for (f = 0; f < new_nproc; ++f) {
5017 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5018 }
5019 // restore the current task state of the master thread: should be the
5020 // implicit task
5021 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5022 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005023
Jonathan Peyton30419822017-05-12 18:01:32 +00005024 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005025
5026#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00005027 for (f = 0; f < team->t.t_nproc; f++) {
5028 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5029 team->t.t_threads[f]->th.th_team_nproc ==
5030 team->t.t_nproc);
5031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005032#endif
5033
5034#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005035 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5036#if KMP_AFFINITY_SUPPORTED
5037 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005038#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005039#endif
5040 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00005041#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005042 kmp_affin_mask_t *old_mask;
5043 if (KMP_AFFINITY_CAPABLE()) {
5044 KMP_CPU_ALLOC(old_mask);
5045 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005046#endif
5047
Jonathan Peyton30419822017-05-12 18:01:32 +00005048 KA_TRACE(20,
5049 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5050 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005051
Jonathan Peyton30419822017-05-12 18:01:32 +00005052 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005053
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005054#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005055 int avail_threads = hot_teams[level].hot_team_nth;
5056 if (new_nproc < avail_threads)
5057 avail_threads = new_nproc;
5058 kmp_info_t **other_threads = team->t.t_threads;
5059 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5060 // Adjust barrier data of reserved threads (if any) of the team
5061 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005062 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005063 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5064 for (b = 0; b < bs_last_barrier; ++b) {
5065 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5066 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005067#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005068 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005069#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005070 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005071 }
5072 if (hot_teams[level].hot_team_nth >= new_nproc) {
5073 // we have all needed threads in reserve, no need to allocate any
5074 // this only possible in mode 1, cannot have reserved threads in mode 0
5075 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5076 team->t.t_nproc = new_nproc; // just get reserved threads involved
5077 } else {
5078 // we may have some threads in reserve, but not enough
5079 team->t.t_nproc =
5080 hot_teams[level]
5081 .hot_team_nth; // get reserved threads involved if any
5082 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5083#endif // KMP_NESTED_HOT_TEAMS
5084 if (team->t.t_max_nproc < new_nproc) {
5085 /* reallocate larger arrays */
5086 __kmp_reallocate_team_arrays(team, new_nproc);
5087 __kmp_reinitialize_team(team, new_icvs, NULL);
5088 }
5089
5090#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5091 /* Temporarily set full mask for master thread before creation of
5092 workers. The reason is that workers inherit the affinity from master,
5093 so if a lot of workers are created on the single core quickly, they
5094 don't get a chance to set their own affinity for a long time. */
5095 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5096#endif
5097
5098 /* allocate new threads for the hot team */
5099 for (f = team->t.t_nproc; f < new_nproc; f++) {
5100 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5101 KMP_DEBUG_ASSERT(new_worker);
5102 team->t.t_threads[f] = new_worker;
5103
5104 KA_TRACE(20,
5105 ("__kmp_allocate_team: team %d init T#%d arrived: "
5106 "join=%llu, plain=%llu\n",
5107 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5108 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5109 team->t.t_bar[bs_plain_barrier].b_arrived));
5110
5111 { // Initialize barrier data for new threads.
5112 int b;
5113 kmp_balign_t *balign = new_worker->th.th_bar;
5114 for (b = 0; b < bs_last_barrier; ++b) {
5115 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5116 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5117 KMP_BARRIER_PARENT_FLAG);
5118#if USE_DEBUGGER
5119 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5120#endif
5121 }
5122 }
5123 }
5124
5125#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5126 if (KMP_AFFINITY_CAPABLE()) {
5127 /* Restore initial master thread's affinity mask */
5128 __kmp_set_system_affinity(old_mask, TRUE);
5129 KMP_CPU_FREE(old_mask);
5130 }
5131#endif
5132#if KMP_NESTED_HOT_TEAMS
5133 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5134#endif // KMP_NESTED_HOT_TEAMS
5135 /* make sure everyone is syncronized */
5136 int old_nproc = team->t.t_nproc; // save old value and use to update only
5137 // new threads below
5138 __kmp_initialize_team(team, new_nproc, new_icvs,
5139 root->r.r_uber_thread->th.th_ident);
5140
5141 /* reinitialize the threads */
5142 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5143 for (f = 0; f < team->t.t_nproc; ++f)
5144 __kmp_initialize_info(team->t.t_threads[f], team, f,
5145 __kmp_gtid_from_tid(f, team));
5146 if (level) { // set th_task_state for new threads in nested hot team
5147 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5148 // only need to set the th_task_state for the new threads. th_task_state
5149 // for master thread will not be accurate until after this in
5150 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5151 // correct value.
5152 for (f = old_nproc; f < team->t.t_nproc; ++f)
5153 team->t.t_threads[f]->th.th_task_state =
5154 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5155 } else { // set th_task_state for new threads in non-nested hot team
5156 int old_state =
5157 team->t.t_threads[0]->th.th_task_state; // copy master's state
5158 for (f = old_nproc; f < team->t.t_nproc; ++f)
5159 team->t.t_threads[f]->th.th_task_state = old_state;
5160 }
5161
5162#ifdef KMP_DEBUG
5163 for (f = 0; f < team->t.t_nproc; ++f) {
5164 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5165 team->t.t_threads[f]->th.th_team_nproc ==
5166 team->t.t_nproc);
5167 }
5168#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005169
5170#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005171 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5172#if KMP_AFFINITY_SUPPORTED
5173 __kmp_partition_places(team);
5174#endif
5175#endif
5176 } // Check changes in number of threads
5177
5178#if OMP_40_ENABLED
5179 kmp_info_t *master = team->t.t_threads[0];
5180 if (master->th.th_teams_microtask) {
5181 for (f = 1; f < new_nproc; ++f) {
5182 // propagate teams construct specific info to workers
5183 kmp_info_t *thr = team->t.t_threads[f];
5184 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5185 thr->th.th_teams_level = master->th.th_teams_level;
5186 thr->th.th_teams_size = master->th.th_teams_size;
5187 }
5188 }
5189#endif /* OMP_40_ENABLED */
5190#if KMP_NESTED_HOT_TEAMS
5191 if (level) {
5192 // Sync barrier state for nested hot teams, not needed for outermost hot
5193 // team.
5194 for (f = 1; f < new_nproc; ++f) {
5195 kmp_info_t *thr = team->t.t_threads[f];
5196 int b;
5197 kmp_balign_t *balign = thr->th.th_bar;
5198 for (b = 0; b < bs_last_barrier; ++b) {
5199 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5200 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5201#if USE_DEBUGGER
5202 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5203#endif
5204 }
5205 }
5206 }
5207#endif // KMP_NESTED_HOT_TEAMS
5208
5209 /* reallocate space for arguments if necessary */
5210 __kmp_alloc_argv_entries(argc, team, TRUE);
5211 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5212 // The hot team re-uses the previous task team,
5213 // if untouched during the previous release->gather phase.
5214
5215 KF_TRACE(10, (" hot_team = %p\n", team));
5216
5217#if KMP_DEBUG
5218 if (__kmp_tasking_mode != tskm_immediate_exec) {
5219 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5220 "task_team[1] = %p after reinit\n",
5221 team->t.t_task_team[0], team->t.t_task_team[1]));
5222 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005223#endif
5224
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005225#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005226 __ompt_team_assign_id(team, ompt_parallel_data);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005227#endif
5228
Jim Cownie5e8470a2013-09-27 10:38:44 +00005229 KMP_MB();
5230
Jim Cownie5e8470a2013-09-27 10:38:44 +00005231 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005232 }
5233
5234 /* next, let's try to take one from the team pool */
5235 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005236 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005237 /* TODO: consider resizing undersized teams instead of reaping them, now
5238 that we have a resizing mechanism */
5239 if (team->t.t_max_nproc >= max_nproc) {
5240 /* take this team from the team pool */
5241 __kmp_team_pool = team->t.t_next_pool;
5242
5243 /* setup the team for fresh use */
5244 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5245
5246 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5247 "task_team[1] %p to NULL\n",
5248 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5249 team->t.t_task_team[0] = NULL;
5250 team->t.t_task_team[1] = NULL;
5251
5252 /* reallocate space for arguments if necessary */
5253 __kmp_alloc_argv_entries(argc, team, TRUE);
5254 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5255
5256 KA_TRACE(
5257 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5258 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5259 { // Initialize barrier data.
5260 int b;
5261 for (b = 0; b < bs_last_barrier; ++b) {
5262 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5263#if USE_DEBUGGER
5264 team->t.t_bar[b].b_master_arrived = 0;
5265 team->t.t_bar[b].b_team_arrived = 0;
5266#endif
5267 }
5268 }
5269
5270#if OMP_40_ENABLED
5271 team->t.t_proc_bind = new_proc_bind;
5272#endif
5273
5274 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5275 team->t.t_id));
5276
5277#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005278 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005279#endif
5280
5281 KMP_MB();
5282
5283 return team;
5284 }
5285
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005286 /* reap team if it is too small, then loop back and check the next one */
5287 // not sure if this is wise, but, will be redone during the hot-teams
5288 // rewrite.
5289 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005290 team = __kmp_reap_team(team);
5291 __kmp_team_pool = team;
5292 }
5293
5294 /* nothing available in the pool, no matter, make a new team! */
5295 KMP_MB();
5296 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5297
5298 /* and set it up */
5299 team->t.t_max_nproc = max_nproc;
5300 /* NOTE well, for some reason allocating one big buffer and dividing it up
5301 seems to really hurt performance a lot on the P4, so, let's not use this */
5302 __kmp_allocate_team_arrays(team, max_nproc);
5303
5304 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5305 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5306
5307 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5308 "%p to NULL\n",
5309 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5310 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5311 // memory, no need to duplicate
5312 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5313 // memory, no need to duplicate
5314
5315 if (__kmp_storage_map) {
5316 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5317 }
5318
5319 /* allocate space for arguments */
5320 __kmp_alloc_argv_entries(argc, team, FALSE);
5321 team->t.t_argc = argc;
5322
5323 KA_TRACE(20,
5324 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5325 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5326 { // Initialize barrier data.
5327 int b;
5328 for (b = 0; b < bs_last_barrier; ++b) {
5329 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5330#if USE_DEBUGGER
5331 team->t.t_bar[b].b_master_arrived = 0;
5332 team->t.t_bar[b].b_team_arrived = 0;
5333#endif
5334 }
5335 }
5336
5337#if OMP_40_ENABLED
5338 team->t.t_proc_bind = new_proc_bind;
5339#endif
5340
5341#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005342 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005343 team->t.ompt_serialized_team_info = NULL;
5344#endif
5345
5346 KMP_MB();
5347
5348 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5349 team->t.t_id));
5350
5351 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005352}
5353
5354/* TODO implement hot-teams at all levels */
5355/* TODO implement lazy thread release on demand (disband request) */
5356
5357/* free the team. return it to the team pool. release all the threads
5358 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005359void __kmp_free_team(kmp_root_t *root,
5360 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5361 int f;
5362 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5363 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005364
Jonathan Peyton30419822017-05-12 18:01:32 +00005365 /* verify state */
5366 KMP_DEBUG_ASSERT(root);
5367 KMP_DEBUG_ASSERT(team);
5368 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5369 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005370
Jonathan Peyton30419822017-05-12 18:01:32 +00005371 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005372#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005373 int level;
5374 kmp_hot_team_ptr_t *hot_teams;
5375 if (master) {
5376 level = team->t.t_active_level - 1;
5377 if (master->th.th_teams_microtask) { // in teams construct?
5378 if (master->th.th_teams_size.nteams > 1) {
5379 ++level; // level was not increased in teams construct for
5380 // team_of_masters
5381 }
5382 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5383 master->th.th_teams_level == team->t.t_level) {
5384 ++level; // level was not increased in teams construct for
5385 // team_of_workers before the parallel
5386 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005387 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005388 hot_teams = master->th.th_hot_teams;
5389 if (level < __kmp_hot_teams_max_level) {
5390 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5391 use_hot_team = 1;
5392 }
5393 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005394#endif // KMP_NESTED_HOT_TEAMS
5395
Jonathan Peyton30419822017-05-12 18:01:32 +00005396 /* team is done working */
5397 TCW_SYNC_PTR(team->t.t_pkfn,
5398 NULL); // Important for Debugging Support Library.
5399 team->t.t_copyin_counter = 0; // init counter for possible reuse
5400 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401
Jonathan Peyton30419822017-05-12 18:01:32 +00005402 /* if we are non-hot team, release our threads */
5403 if (!use_hot_team) {
5404 if (__kmp_tasking_mode != tskm_immediate_exec) {
5405 // Wait for threads to reach reapable state
5406 for (f = 1; f < team->t.t_nproc; ++f) {
5407 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5408 kmp_info_t *th = team->t.t_threads[f];
5409 volatile kmp_uint32 *state = &th->th.th_reap_state;
5410 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005411#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005412 // On Windows a thread can be killed at any time, check this
5413 DWORD ecode;
5414 if (!__kmp_is_thread_alive(th, &ecode)) {
5415 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5416 break;
5417 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005418#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005419 // first check if thread is sleeping
5420 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5421 if (fl.is_sleeping())
5422 fl.resume(__kmp_gtid_from_thread(th));
5423 KMP_CPU_PAUSE();
5424 }
5425 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005426
Jonathan Peyton30419822017-05-12 18:01:32 +00005427 // Delete task teams
5428 int tt_idx;
5429 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5430 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5431 if (task_team != NULL) {
5432 for (f = 0; f < team->t.t_nproc;
5433 ++f) { // Have all threads unref task teams
5434 team->t.t_threads[f]->th.th_task_team = NULL;
5435 }
5436 KA_TRACE(
5437 20,
5438 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5439 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005440#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005441 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005442#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005443 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005444 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005445 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005446 }
5447
Jonathan Peyton30419822017-05-12 18:01:32 +00005448 // Reset pointer to parent team only for non-hot teams.
5449 team->t.t_parent = NULL;
5450 team->t.t_level = 0;
5451 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005452
Jonathan Peyton30419822017-05-12 18:01:32 +00005453 /* free the worker threads */
5454 for (f = 1; f < team->t.t_nproc; ++f) {
5455 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5456 __kmp_free_thread(team->t.t_threads[f]);
5457 team->t.t_threads[f] = NULL;
5458 }
5459
5460 /* put the team back in the team pool */
5461 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005462 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005463 __kmp_team_pool = (volatile kmp_team_t *)team;
5464 }
5465
5466 KMP_MB();
5467}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005468
5469/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005470kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5471 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005472
Jonathan Peyton30419822017-05-12 18:01:32 +00005473 KMP_DEBUG_ASSERT(team);
5474 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5475 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5476 KMP_DEBUG_ASSERT(team->t.t_threads);
5477 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005478
Jonathan Peyton30419822017-05-12 18:01:32 +00005479 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005480
Jonathan Peyton30419822017-05-12 18:01:32 +00005481 /* free stuff */
5482 __kmp_free_team_arrays(team);
5483 if (team->t.t_argv != &team->t.t_inline_argv[0])
5484 __kmp_free((void *)team->t.t_argv);
5485 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486
Jonathan Peyton30419822017-05-12 18:01:32 +00005487 KMP_MB();
5488 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005489}
5490
Jim Cownie5e8470a2013-09-27 10:38:44 +00005491// Free the thread. Don't reap it, just place it on the pool of available
5492// threads.
5493//
5494// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5495// binding for the affinity mechanism to be useful.
5496//
5497// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5498// However, we want to avoid a potential performance problem by always
5499// scanning through the list to find the correct point at which to insert
5500// the thread (potential N**2 behavior). To do this we keep track of the
5501// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5502// With single-level parallelism, threads will always be added to the tail
5503// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5504// parallelism, all bets are off and we may need to scan through the entire
5505// free list.
5506//
5507// This change also has a potentially large performance benefit, for some
5508// applications. Previously, as threads were freed from the hot team, they
5509// would be placed back on the free list in inverse order. If the hot team
5510// grew back to it's original size, then the freed thread would be placed
5511// back on the hot team in reverse order. This could cause bad cache
5512// locality problems on programs where the size of the hot team regularly
5513// grew and shrunk.
5514//
5515// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005516void __kmp_free_thread(kmp_info_t *this_th) {
5517 int gtid;
5518 kmp_info_t **scan;
Jonathan Peytonf4392462017-07-27 20:58:41 +00005519 kmp_root_t *root = this_th->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005520
Jonathan Peyton30419822017-05-12 18:01:32 +00005521 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5522 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005523
Jonathan Peyton30419822017-05-12 18:01:32 +00005524 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005525
Jonathan Peyton30419822017-05-12 18:01:32 +00005526 // When moving thread to pool, switch thread to wait on own b_go flag, and
5527 // uninitialized (NULL team).
5528 int b;
5529 kmp_balign_t *balign = this_th->th.th_bar;
5530 for (b = 0; b < bs_last_barrier; ++b) {
5531 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5532 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5533 balign[b].bb.team = NULL;
5534 balign[b].bb.leaf_kids = 0;
5535 }
5536 this_th->th.th_task_state = 0;
5537
5538 /* put thread back on the free pool */
5539 TCW_PTR(this_th->th.th_team, NULL);
5540 TCW_PTR(this_th->th.th_root, NULL);
5541 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5542
5543 // If the __kmp_thread_pool_insert_pt is already past the new insert
5544 // point, then we need to re-scan the entire list.
5545 gtid = this_th->th.th_info.ds.ds_gtid;
5546 if (__kmp_thread_pool_insert_pt != NULL) {
5547 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5548 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5549 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005550 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005551 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005552
Jonathan Peyton30419822017-05-12 18:01:32 +00005553 // Scan down the list to find the place to insert the thread.
5554 // scan is the address of a link in the list, possibly the address of
5555 // __kmp_thread_pool itself.
5556 //
5557 // In the absence of nested parallism, the for loop will have 0 iterations.
5558 if (__kmp_thread_pool_insert_pt != NULL) {
5559 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5560 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005561 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005562 }
5563 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5564 scan = &((*scan)->th.th_next_pool))
5565 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005566
Jonathan Peyton30419822017-05-12 18:01:32 +00005567 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5568 // to its address.
5569 TCW_PTR(this_th->th.th_next_pool, *scan);
5570 __kmp_thread_pool_insert_pt = *scan = this_th;
5571 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5572 (this_th->th.th_info.ds.ds_gtid <
5573 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5574 TCW_4(this_th->th.th_in_pool, TRUE);
5575 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005576
Jonathan Peyton30419822017-05-12 18:01:32 +00005577 TCW_4(__kmp_nth, __kmp_nth - 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00005578 root->r.r_cg_nthreads--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005579
5580#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005581 /* Adjust blocktime back to user setting or default if necessary */
5582 /* Middle initialization might never have occurred */
5583 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5584 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5585 if (__kmp_nth <= __kmp_avail_proc) {
5586 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005587 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005589#endif /* KMP_ADJUST_BLOCKTIME */
5590
Jonathan Peyton30419822017-05-12 18:01:32 +00005591 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005592}
5593
Jim Cownie5e8470a2013-09-27 10:38:44 +00005594/* ------------------------------------------------------------------------ */
5595
Jonathan Peyton30419822017-05-12 18:01:32 +00005596void *__kmp_launch_thread(kmp_info_t *this_thr) {
5597 int gtid = this_thr->th.th_info.ds.ds_gtid;
5598 /* void *stack_data;*/
5599 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005600
Jonathan Peyton30419822017-05-12 18:01:32 +00005601 KMP_MB();
5602 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005603
Jonathan Peyton30419822017-05-12 18:01:32 +00005604 if (__kmp_env_consistency_check) {
5605 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5606 }
5607
5608#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005609 ompt_data_t *thread_data;
5610 if (ompt_enabled.enabled) {
5611 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5612 thread_data->ptr = NULL;
5613
5614 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005615 this_thr->th.ompt_thread_info.wait_id = 0;
Joachim Protze82e94a52017-11-01 10:08:30 +00005616 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5617 if (ompt_enabled.ompt_callback_thread_begin) {
5618 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5619 ompt_thread_worker, thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005620 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005621 }
5622#endif
5623
Joachim Protze82e94a52017-11-01 10:08:30 +00005624#if OMPT_SUPPORT
5625 if (ompt_enabled.enabled) {
5626 this_thr->th.ompt_thread_info.state = omp_state_idle;
5627 }
5628#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005629 /* This is the place where threads wait for work */
5630 while (!TCR_4(__kmp_global.g.g_done)) {
5631 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5632 KMP_MB();
5633
5634 /* wait for work to do */
5635 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005636
Jonathan Peyton30419822017-05-12 18:01:32 +00005637 /* No tid yet since not part of a team */
5638 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5639
5640#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005641 if (ompt_enabled.enabled) {
5642 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005643 }
5644#endif
5645
5646 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5647
5648 /* have we been allocated? */
5649 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005650 /* we were just woken up, so run our new task */
5651 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5652 int rc;
5653 KA_TRACE(20,
5654 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5655 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5656 (*pteam)->t.t_pkfn));
5657
5658 updateHWFPControl(*pteam);
5659
5660#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005661 if (ompt_enabled.enabled) {
5662 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00005663 }
5664#endif
5665
5666 {
5667 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5668 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5669 rc = (*pteam)->t.t_invoke(gtid);
5670 }
5671 KMP_ASSERT(rc);
5672
Jim Cownie5e8470a2013-09-27 10:38:44 +00005673 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005674 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5675 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5676 (*pteam)->t.t_pkfn));
5677 }
Joachim Protze82e94a52017-11-01 10:08:30 +00005678#if OMPT_SUPPORT
5679 if (ompt_enabled.enabled) {
5680 /* no frame set while outside task */
5681 __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL;
5682
5683 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5684 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
Jonathan Peyton30419822017-05-12 18:01:32 +00005685 }
5686#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00005687 /* join barrier after parallel region */
5688 __kmp_join_barrier(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005689 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005690 }
5691 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005692
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005693#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005694 if (ompt_enabled.ompt_callback_thread_end) {
5695 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005696 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005697#endif
5698
Jonathan Peyton30419822017-05-12 18:01:32 +00005699 this_thr->th.th_task_team = NULL;
5700 /* run the destructors for the threadprivate data for this thread */
5701 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005702
Jonathan Peyton30419822017-05-12 18:01:32 +00005703 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5704 KMP_MB();
5705 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005706}
5707
5708/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005709
Jonathan Peyton30419822017-05-12 18:01:32 +00005710void __kmp_internal_end_dest(void *specific_gtid) {
5711#if KMP_COMPILER_ICC
5712#pragma warning(push)
5713#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5714// significant bits
5715#endif
5716 // Make sure no significant bits are lost
5717 int gtid = (kmp_intptr_t)specific_gtid - 1;
5718#if KMP_COMPILER_ICC
5719#pragma warning(pop)
5720#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005721
Jonathan Peyton30419822017-05-12 18:01:32 +00005722 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5723 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5724 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005725
Jonathan Peyton30419822017-05-12 18:01:32 +00005726 /* josh: One reason for setting the gtid specific data even when it is being
5727 destroyed by pthread is to allow gtid lookup through thread specific data
5728 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5729 that gets executed in the call to __kmp_internal_end_thread, actually
5730 gets the gtid through the thread specific data. Setting it here seems
5731 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5732 to run smoothly.
5733 todo: get rid of this after we remove the dependence on
5734 __kmp_gtid_get_specific */
5735 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5736 __kmp_gtid_set_specific(gtid);
5737#ifdef KMP_TDATA_GTID
5738 __kmp_gtid = gtid;
5739#endif
5740 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005741}
5742
Jonathan Peyton99016992015-05-26 17:32:53 +00005743#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005744
Jonathan Peyton30419822017-05-12 18:01:32 +00005745// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5746// destructors work perfectly, but in real libomp.so I have no evidence it is
5747// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005748
Jonathan Peyton30419822017-05-12 18:01:32 +00005749__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5750 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005751}
5752
Jonathan Peyton30419822017-05-12 18:01:32 +00005753void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005754
5755#endif
5756
Jonathan Peyton30419822017-05-12 18:01:32 +00005757/* [Windows] josh: when the atexit handler is called, there may still be more
5758 than one thread alive */
5759void __kmp_internal_end_atexit(void) {
5760 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5761 /* [Windows]
5762 josh: ideally, we want to completely shutdown the library in this atexit
5763 handler, but stat code that depends on thread specific data for gtid fails
5764 because that data becomes unavailable at some point during the shutdown, so
5765 we call __kmp_internal_end_thread instead. We should eventually remove the
5766 dependency on __kmp_get_specific_gtid in the stat code and use
5767 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005768
Jonathan Peyton30419822017-05-12 18:01:32 +00005769 // TODO: Can some of this comment about GVS be removed?
5770 I suspect that the offending stat code is executed when the calling thread
5771 tries to clean up a dead root thread's data structures, resulting in GVS
5772 code trying to close the GVS structures for that thread, but since the stat
5773 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5774 the calling thread is cleaning up itself instead of another thread, it get
5775 confused. This happens because allowing a thread to unregister and cleanup
5776 another thread is a recent modification for addressing an issue.
5777 Based on the current design (20050722), a thread may end up
5778 trying to unregister another thread only if thread death does not trigger
5779 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5780 thread specific data destructor function to detect thread death. For
5781 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5782 is nothing. Thus, the workaround is applicable only for Windows static
5783 stat library. */
5784 __kmp_internal_end_library(-1);
5785#if KMP_OS_WINDOWS
5786 __kmp_close_console();
5787#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005788}
5789
Jonathan Peyton30419822017-05-12 18:01:32 +00005790static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5791 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005792
Jonathan Peyton30419822017-05-12 18:01:32 +00005793 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005794
Jonathan Peyton30419822017-05-12 18:01:32 +00005795 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005796
Jonathan Peyton30419822017-05-12 18:01:32 +00005797 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005798
Jonathan Peyton30419822017-05-12 18:01:32 +00005799 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005800
Jonathan Peyton30419822017-05-12 18:01:32 +00005801 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5802 /* Assume the threads are at the fork barrier here */
5803 KA_TRACE(
5804 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5805 gtid));
5806 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5807 * (GEH) */
5808 ANNOTATE_HAPPENS_BEFORE(thread);
5809 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5810 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005811 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005812
Jonathan Peyton30419822017-05-12 18:01:32 +00005813 // Terminate OS thread.
5814 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005815
Jonathan Peyton30419822017-05-12 18:01:32 +00005816 // The thread was killed asynchronously. If it was actively
5817 // spinning in the thread pool, decrement the global count.
5818 //
5819 // There is a small timing hole here - if the worker thread was just waking
5820 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5821 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5822 // the global counter might not get updated.
5823 //
5824 // Currently, this can only happen as the library is unloaded,
5825 // so there are no harmful side effects.
5826 if (thread->th.th_active_in_pool) {
5827 thread->th.th_active_in_pool = FALSE;
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00005828 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
Jonathan Peyton30419822017-05-12 18:01:32 +00005829 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5830 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005831
Jonathan Peyton30419822017-05-12 18:01:32 +00005832 // Decrement # of [worker] threads in the pool.
5833 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5834 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005835 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005836
Jonathan Peyton30419822017-05-12 18:01:32 +00005837 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005838
Jonathan Peyton30419822017-05-12 18:01:32 +00005839// Free the fast memory for tasking
5840#if USE_FAST_MEMORY
5841 __kmp_free_fast_memory(thread);
5842#endif /* USE_FAST_MEMORY */
5843
5844 __kmp_suspend_uninitialize_thread(thread);
5845
5846 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5847 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5848
5849 --__kmp_all_nth;
5850// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005851
5852#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005853 /* Adjust blocktime back to user setting or default if necessary */
5854 /* Middle initialization might never have occurred */
5855 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5856 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5857 if (__kmp_nth <= __kmp_avail_proc) {
5858 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005859 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005860 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861#endif /* KMP_ADJUST_BLOCKTIME */
5862
Jonathan Peyton30419822017-05-12 18:01:32 +00005863 /* free the memory being used */
5864 if (__kmp_env_consistency_check) {
5865 if (thread->th.th_cons) {
5866 __kmp_free_cons_stack(thread->th.th_cons);
5867 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005868 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005869 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005870
Jonathan Peyton30419822017-05-12 18:01:32 +00005871 if (thread->th.th_pri_common != NULL) {
5872 __kmp_free(thread->th.th_pri_common);
5873 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005874 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005875
Jonathan Peyton30419822017-05-12 18:01:32 +00005876 if (thread->th.th_task_state_memo_stack != NULL) {
5877 __kmp_free(thread->th.th_task_state_memo_stack);
5878 thread->th.th_task_state_memo_stack = NULL;
5879 }
5880
5881#if KMP_USE_BGET
5882 if (thread->th.th_local.bget_data != NULL) {
5883 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005884 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005885#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005886
Alp Toker98758b02014-03-02 04:12:06 +00005887#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005888 if (thread->th.th_affin_mask != NULL) {
5889 KMP_CPU_FREE(thread->th.th_affin_mask);
5890 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005891 }
Alp Toker98758b02014-03-02 04:12:06 +00005892#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005893
Jonathan Peyton30419822017-05-12 18:01:32 +00005894 __kmp_reap_team(thread->th.th_serial_team);
5895 thread->th.th_serial_team = NULL;
5896 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005897
Jonathan Peyton30419822017-05-12 18:01:32 +00005898 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005899
5900} // __kmp_reap_thread
5901
Jonathan Peyton30419822017-05-12 18:01:32 +00005902static void __kmp_internal_end(void) {
5903 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005904
Jonathan Peyton30419822017-05-12 18:01:32 +00005905 /* First, unregister the library */
5906 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005907
Jonathan Peyton30419822017-05-12 18:01:32 +00005908#if KMP_OS_WINDOWS
5909 /* In Win static library, we can't tell when a root actually dies, so we
5910 reclaim the data structures for any root threads that have died but not
5911 unregistered themselves, in order to shut down cleanly.
5912 In Win dynamic library we also can't tell when a thread dies. */
5913 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5914// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005915#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005916
Jonathan Peyton30419822017-05-12 18:01:32 +00005917 for (i = 0; i < __kmp_threads_capacity; i++)
5918 if (__kmp_root[i])
5919 if (__kmp_root[i]->r.r_active)
5920 break;
5921 KMP_MB(); /* Flush all pending memory write invalidates. */
5922 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5923
5924 if (i < __kmp_threads_capacity) {
5925#if KMP_USE_MONITOR
5926 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5927 KMP_MB(); /* Flush all pending memory write invalidates. */
5928
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005929 // Need to check that monitor was initialized before reaping it. If we are
5930 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5931 // __kmp_monitor will appear to contain valid data, but it is only valid in
5932 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00005933 // New behavior (201008): instead of keying off of the flag
5934 // __kmp_init_parallel, the monitor thread creation is keyed off
5935 // of the new flag __kmp_init_monitor.
5936 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5937 if (TCR_4(__kmp_init_monitor)) {
5938 __kmp_reap_monitor(&__kmp_monitor);
5939 TCW_4(__kmp_init_monitor, 0);
5940 }
5941 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5942 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5943#endif // KMP_USE_MONITOR
5944 } else {
5945/* TODO move this to cleanup code */
5946#ifdef KMP_DEBUG
5947 /* make sure that everything has properly ended */
5948 for (i = 0; i < __kmp_threads_capacity; i++) {
5949 if (__kmp_root[i]) {
5950 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5951 // there can be uber threads alive here
5952 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5953 }
5954 }
5955#endif
5956
5957 KMP_MB();
5958
5959 // Reap the worker threads.
5960 // This is valid for now, but be careful if threads are reaped sooner.
5961 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5962 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005963 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005964 __kmp_thread_pool = thread->th.th_next_pool;
5965 // Reap it.
5966 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5967 thread->th.th_next_pool = NULL;
5968 thread->th.th_in_pool = FALSE;
5969 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005970 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005971 __kmp_thread_pool_insert_pt = NULL;
5972
5973 // Reap teams.
5974 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5975 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005976 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005977 __kmp_team_pool = team->t.t_next_pool;
5978 // Reap it.
5979 team->t.t_next_pool = NULL;
5980 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005981 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005982
5983 __kmp_reap_task_teams();
5984
5985 for (i = 0; i < __kmp_threads_capacity; ++i) {
5986 // TBD: Add some checking...
5987 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5988 }
5989
5990 /* Make sure all threadprivate destructors get run by joining with all
5991 worker threads before resetting this flag */
5992 TCW_SYNC_4(__kmp_init_common, FALSE);
5993
5994 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5995 KMP_MB();
5996
5997#if KMP_USE_MONITOR
5998 // See note above: One of the possible fixes for CQ138434 / CQ140126
5999 //
6000 // FIXME: push both code fragments down and CSE them?
6001 // push them into __kmp_cleanup() ?
6002 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6003 if (TCR_4(__kmp_init_monitor)) {
6004 __kmp_reap_monitor(&__kmp_monitor);
6005 TCW_4(__kmp_init_monitor, 0);
6006 }
6007 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6008 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6009#endif
6010 } /* else !__kmp_global.t_active */
6011 TCW_4(__kmp_init_gtid, FALSE);
6012 KMP_MB(); /* Flush all pending memory write invalidates. */
6013
6014 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006015#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006016 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006017#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006018}
6019
Jonathan Peyton30419822017-05-12 18:01:32 +00006020void __kmp_internal_end_library(int gtid_req) {
6021 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6022 /* this shouldn't be a race condition because __kmp_internal_end() is the
6023 only place to clear __kmp_serial_init */
6024 /* we'll check this later too, after we get the lock */
6025 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6026 // redundaant, because the next check will work in any case.
6027 if (__kmp_global.g.g_abort) {
6028 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6029 /* TODO abort? */
6030 return;
6031 }
6032 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6033 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6034 return;
6035 }
6036
6037 KMP_MB(); /* Flush all pending memory write invalidates. */
6038
6039 /* find out who we are and what we should do */
6040 {
6041 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6042 KA_TRACE(
6043 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6044 if (gtid == KMP_GTID_SHUTDOWN) {
6045 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6046 "already shutdown\n"));
6047 return;
6048 } else if (gtid == KMP_GTID_MONITOR) {
6049 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6050 "registered, or system shutdown\n"));
6051 return;
6052 } else if (gtid == KMP_GTID_DNE) {
6053 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6054 "shutdown\n"));
6055 /* we don't know who we are, but we may still shutdown the library */
6056 } else if (KMP_UBER_GTID(gtid)) {
6057 /* unregister ourselves as an uber thread. gtid is no longer valid */
6058 if (__kmp_root[gtid]->r.r_active) {
6059 __kmp_global.g.g_abort = -1;
6060 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6061 KA_TRACE(10,
6062 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6063 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006064 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006065 } else {
6066 KA_TRACE(
6067 10,
6068 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6069 __kmp_unregister_root_current_thread(gtid);
6070 }
6071 } else {
6072/* worker threads may call this function through the atexit handler, if they
6073 * call exit() */
6074/* For now, skip the usual subsequent processing and just dump the debug buffer.
6075 TODO: do a thorough shutdown instead */
6076#ifdef DUMP_DEBUG_ON_EXIT
6077 if (__kmp_debug_buf)
6078 __kmp_dump_debug_buffer();
6079#endif
6080 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006081 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006082 }
6083 /* synchronize the termination process */
6084 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006085
Jonathan Peyton30419822017-05-12 18:01:32 +00006086 /* have we already finished */
6087 if (__kmp_global.g.g_abort) {
6088 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6089 /* TODO abort? */
6090 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6091 return;
6092 }
6093 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6094 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6095 return;
6096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006097
Jonathan Peyton30419822017-05-12 18:01:32 +00006098 /* We need this lock to enforce mutex between this reading of
6099 __kmp_threads_capacity and the writing by __kmp_register_root.
6100 Alternatively, we can use a counter of roots that is atomically updated by
6101 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6102 __kmp_internal_end_*. */
6103 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006104
Jonathan Peyton30419822017-05-12 18:01:32 +00006105 /* now we can safely conduct the actual termination */
6106 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006107
Jonathan Peyton30419822017-05-12 18:01:32 +00006108 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6109 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006110
Jonathan Peyton30419822017-05-12 18:01:32 +00006111 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006112
Jonathan Peyton30419822017-05-12 18:01:32 +00006113#ifdef DUMP_DEBUG_ON_EXIT
6114 if (__kmp_debug_buf)
6115 __kmp_dump_debug_buffer();
6116#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006117
Jonathan Peyton30419822017-05-12 18:01:32 +00006118#if KMP_OS_WINDOWS
6119 __kmp_close_console();
6120#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006121
Jonathan Peyton30419822017-05-12 18:01:32 +00006122 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006123
6124} // __kmp_internal_end_library
6125
Jonathan Peyton30419822017-05-12 18:01:32 +00006126void __kmp_internal_end_thread(int gtid_req) {
6127 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006128
Jonathan Peyton30419822017-05-12 18:01:32 +00006129 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6130 /* this shouldn't be a race condition because __kmp_internal_end() is the
6131 * only place to clear __kmp_serial_init */
6132 /* we'll check this later too, after we get the lock */
6133 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6134 // redundant, because the next check will work in any case.
6135 if (__kmp_global.g.g_abort) {
6136 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6137 /* TODO abort? */
6138 return;
6139 }
6140 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6141 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6142 return;
6143 }
6144
6145 KMP_MB(); /* Flush all pending memory write invalidates. */
6146
6147 /* find out who we are and what we should do */
6148 {
6149 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6150 KA_TRACE(10,
6151 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6152 if (gtid == KMP_GTID_SHUTDOWN) {
6153 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6154 "already shutdown\n"));
6155 return;
6156 } else if (gtid == KMP_GTID_MONITOR) {
6157 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6158 "registered, or system shutdown\n"));
6159 return;
6160 } else if (gtid == KMP_GTID_DNE) {
6161 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6162 "shutdown\n"));
6163 return;
6164 /* we don't know who we are */
6165 } else if (KMP_UBER_GTID(gtid)) {
6166 /* unregister ourselves as an uber thread. gtid is no longer valid */
6167 if (__kmp_root[gtid]->r.r_active) {
6168 __kmp_global.g.g_abort = -1;
6169 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6170 KA_TRACE(10,
6171 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6172 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006173 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006174 } else {
6175 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6176 gtid));
6177 __kmp_unregister_root_current_thread(gtid);
6178 }
6179 } else {
6180 /* just a worker thread, let's leave */
6181 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6182
6183 if (gtid >= 0) {
6184 __kmp_threads[gtid]->th.th_task_team = NULL;
6185 }
6186
6187 KA_TRACE(10,
6188 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6189 gtid));
6190 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006191 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006192 }
6193#if defined KMP_DYNAMIC_LIB
6194 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6195 // thread, because we will better shutdown later in the library destructor.
6196 // The reason of this change is performance problem when non-openmp thread in
6197 // a loop forks and joins many openmp threads. We can save a lot of time
6198 // keeping worker threads alive until the program shutdown.
6199 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6200 // and Windows(DPD200287443) that occurs when using critical sections from
6201 // foreign threads.
6202 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6203 return;
6204#endif
6205 /* synchronize the termination process */
6206 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006207
Jonathan Peyton30419822017-05-12 18:01:32 +00006208 /* have we already finished */
6209 if (__kmp_global.g.g_abort) {
6210 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6211 /* TODO abort? */
6212 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6213 return;
6214 }
6215 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6216 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6217 return;
6218 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006219
Jonathan Peyton30419822017-05-12 18:01:32 +00006220 /* We need this lock to enforce mutex between this reading of
6221 __kmp_threads_capacity and the writing by __kmp_register_root.
6222 Alternatively, we can use a counter of roots that is atomically updated by
6223 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6224 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006225
Jonathan Peyton30419822017-05-12 18:01:32 +00006226 /* should we finish the run-time? are all siblings done? */
6227 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006228
Jonathan Peyton30419822017-05-12 18:01:32 +00006229 for (i = 0; i < __kmp_threads_capacity; ++i) {
6230 if (KMP_UBER_GTID(i)) {
6231 KA_TRACE(
6232 10,
6233 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6234 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6235 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6236 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006237 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006238 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006239
Jonathan Peyton30419822017-05-12 18:01:32 +00006240 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006241
Jonathan Peyton30419822017-05-12 18:01:32 +00006242 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006243
Jonathan Peyton30419822017-05-12 18:01:32 +00006244 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006246
Jonathan Peyton30419822017-05-12 18:01:32 +00006247 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006248
Jonathan Peyton30419822017-05-12 18:01:32 +00006249#ifdef DUMP_DEBUG_ON_EXIT
6250 if (__kmp_debug_buf)
6251 __kmp_dump_debug_buffer();
6252#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006253} // __kmp_internal_end_thread
6254
Jonathan Peyton30419822017-05-12 18:01:32 +00006255// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006256// Library registration stuff.
6257
Jonathan Peyton30419822017-05-12 18:01:32 +00006258static long __kmp_registration_flag = 0;
6259// Random value used to indicate library initialization.
6260static char *__kmp_registration_str = NULL;
6261// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006262
Jonathan Peyton30419822017-05-12 18:01:32 +00006263static inline char *__kmp_reg_status_name() {
6264 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6265 each thread. If registration and unregistration go in different threads
6266 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6267 env var can not be found, because the name will contain different pid. */
6268 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006269} // __kmp_reg_status_get
6270
Jonathan Peyton30419822017-05-12 18:01:32 +00006271void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006272
Jonathan Peyton30419822017-05-12 18:01:32 +00006273 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6274 int done = 0;
6275 union {
6276 double dtime;
6277 long ltime;
6278 } time;
6279#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6280 __kmp_initialize_system_tick();
6281#endif
6282 __kmp_read_system_time(&time.dtime);
6283 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6284 __kmp_registration_str =
6285 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6286 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006287
Jonathan Peyton30419822017-05-12 18:01:32 +00006288 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6289 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006290
Jonathan Peyton30419822017-05-12 18:01:32 +00006291 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006292
Jonathan Peyton30419822017-05-12 18:01:32 +00006293 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006294
Jonathan Peyton30419822017-05-12 18:01:32 +00006295 // Set environment variable, but do not overwrite if it is exist.
6296 __kmp_env_set(name, __kmp_registration_str, 0);
6297 // Check the variable is written.
6298 value = __kmp_env_get(name);
6299 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006300
Jonathan Peyton30419822017-05-12 18:01:32 +00006301 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006302
Jonathan Peyton30419822017-05-12 18:01:32 +00006303 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006304
Jonathan Peyton30419822017-05-12 18:01:32 +00006305 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6306 // Check whether it alive or dead.
6307 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6308 char *tail = value;
6309 char *flag_addr_str = NULL;
6310 char *flag_val_str = NULL;
6311 char const *file_name = NULL;
6312 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6313 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6314 file_name = tail;
6315 if (tail != NULL) {
6316 long *flag_addr = 0;
6317 long flag_val = 0;
6318 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6319 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6320 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6321 // First, check whether environment-encoded address is mapped into
6322 // addr space.
6323 // If so, dereference it to see if it still has the right value.
6324 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6325 neighbor = 1;
6326 } else {
6327 // If not, then we know the other copy of the library is no longer
6328 // running.
6329 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006330 }
6331 }
6332 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006333 switch (neighbor) {
6334 case 0: // Cannot parse environment variable -- neighbor status unknown.
6335 // Assume it is the incompatible format of future version of the
6336 // library. Assume the other library is alive.
6337 // WARN( ... ); // TODO: Issue a warning.
6338 file_name = "unknown library";
6339 // Attention! Falling to the next case. That's intentional.
6340 case 1: { // Neighbor is alive.
6341 // Check it is allowed.
6342 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6343 if (!__kmp_str_match_true(duplicate_ok)) {
6344 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006345 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6346 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006347 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006348 KMP_INTERNAL_FREE(duplicate_ok);
6349 __kmp_duplicate_library_ok = 1;
6350 done = 1; // Exit the loop.
6351 } break;
6352 case 2: { // Neighbor is dead.
6353 // Clear the variable and try to register library again.
6354 __kmp_env_unset(name);
6355 } break;
6356 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006357 }
6358 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006359 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006360 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006361 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362
6363} // func __kmp_register_library_startup
6364
Jonathan Peyton30419822017-05-12 18:01:32 +00006365void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006366
Jonathan Peyton30419822017-05-12 18:01:32 +00006367 char *name = __kmp_reg_status_name();
6368 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006369
Jonathan Peyton30419822017-05-12 18:01:32 +00006370 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6371 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6372 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6373 // Ok, this is our variable. Delete it.
6374 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006375 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006376
Jonathan Peyton30419822017-05-12 18:01:32 +00006377 KMP_INTERNAL_FREE(__kmp_registration_str);
6378 KMP_INTERNAL_FREE(value);
6379 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006380
Jonathan Peyton30419822017-05-12 18:01:32 +00006381 __kmp_registration_flag = 0;
6382 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006383
6384} // __kmp_unregister_library
6385
Jim Cownie5e8470a2013-09-27 10:38:44 +00006386// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006387// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006388
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006389#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006390
Jonathan Peyton30419822017-05-12 18:01:32 +00006391static void __kmp_check_mic_type() {
6392 kmp_cpuid_t cpuid_state = {0};
6393 kmp_cpuid_t *cs_p = &cpuid_state;
6394 __kmp_x86_cpuid(1, 0, cs_p);
6395 // We don't support mic1 at the moment
6396 if ((cs_p->eax & 0xff0) == 0xB10) {
6397 __kmp_mic_type = mic2;
6398 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6399 __kmp_mic_type = mic3;
6400 } else {
6401 __kmp_mic_type = non_mic;
6402 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006403}
6404
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006405#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006406
Jonathan Peyton30419822017-05-12 18:01:32 +00006407static void __kmp_do_serial_initialize(void) {
6408 int i, gtid;
6409 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006410
Jonathan Peyton30419822017-05-12 18:01:32 +00006411 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006412
Jonathan Peyton30419822017-05-12 18:01:32 +00006413 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6414 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6415 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6416 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6417 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006418
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006419#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006420 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006421#endif
6422
Jonathan Peyton30419822017-05-12 18:01:32 +00006423 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006424
Jonathan Peyton30419822017-05-12 18:01:32 +00006425 /* Initialize internal memory allocator */
6426 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006427
Jonathan Peyton30419822017-05-12 18:01:32 +00006428 /* Register the library startup via an environment variable and check to see
6429 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006430
Jonathan Peyton30419822017-05-12 18:01:32 +00006431 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006432
Jonathan Peyton30419822017-05-12 18:01:32 +00006433 /* TODO reinitialization of library */
6434 if (TCR_4(__kmp_global.g.g_done)) {
6435 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6436 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006437
Jonathan Peyton30419822017-05-12 18:01:32 +00006438 __kmp_global.g.g_abort = 0;
6439 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006440
Jonathan Peyton30419822017-05-12 18:01:32 +00006441/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006442#if KMP_USE_ADAPTIVE_LOCKS
6443#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006444 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006445#endif
6446#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006447#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006448 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006449#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006450 __kmp_init_lock(&__kmp_global_lock);
6451 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6452 __kmp_init_lock(&__kmp_debug_lock);
6453 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6454 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6455 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6456 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6457 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6458 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6459 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6460 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6461 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6462 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6463 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6464 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6465 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6466 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6467 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006468#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006469 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006470#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006471 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006472
Jonathan Peyton30419822017-05-12 18:01:32 +00006473 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006474
Jonathan Peyton30419822017-05-12 18:01:32 +00006475 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006476
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006477#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006478 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006479#endif
6480
Jonathan Peyton30419822017-05-12 18:01:32 +00006481// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006482#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006483 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006484#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006485 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006486
Jonathan Peyton30419822017-05-12 18:01:32 +00006487 // From __kmp_init_dflt_team_nth()
6488 /* assume the entire machine will be used */
6489 __kmp_dflt_team_nth_ub = __kmp_xproc;
6490 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6491 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6492 }
6493 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6494 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6495 }
6496 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006497 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006498 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6499 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6500 __kmp_teams_max_nth = __kmp_sys_max_nth;
6501 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006502
Jonathan Peyton30419822017-05-12 18:01:32 +00006503 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6504 // part
6505 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006506#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006507 __kmp_monitor_wakeups =
6508 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6509 __kmp_bt_intervals =
6510 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006511#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006512 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6513 __kmp_library = library_throughput;
6514 // From KMP_SCHEDULE initialization
6515 __kmp_static = kmp_sch_static_balanced;
6516// AC: do not use analytical here, because it is non-monotonous
6517//__kmp_guided = kmp_sch_guided_iterative_chunked;
6518//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6519// need to repeat assignment
6520// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6521// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006522#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006523#define kmp_reduction_barrier_gather_bb ((int)1)
6524#define kmp_reduction_barrier_release_bb ((int)1)
6525#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6526#define kmp_reduction_barrier_release_pat bp_hyper_bar
6527#endif // KMP_FAST_REDUCTION_BARRIER
6528 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6529 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6530 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6531 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6532 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6533#if KMP_FAST_REDUCTION_BARRIER
6534 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6535 // lin_64 ): hyper,1
6536 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6537 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6538 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6539 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006540 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006541#endif // KMP_FAST_REDUCTION_BARRIER
6542 }
6543#if KMP_FAST_REDUCTION_BARRIER
6544#undef kmp_reduction_barrier_release_pat
6545#undef kmp_reduction_barrier_gather_pat
6546#undef kmp_reduction_barrier_release_bb
6547#undef kmp_reduction_barrier_gather_bb
6548#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006549#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006550 if (__kmp_mic_type == mic2) { // KNC
6551 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6552 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6553 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6554 1; // forkjoin release
6555 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6556 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6557 }
6558#if KMP_FAST_REDUCTION_BARRIER
6559 if (__kmp_mic_type == mic2) { // KNC
6560 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6561 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6562 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006563#endif // KMP_FAST_REDUCTION_BARRIER
6564#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006565
Jonathan Peyton30419822017-05-12 18:01:32 +00006566// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006567#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006568 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006569#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006570 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006571#endif
6572
Jonathan Peyton30419822017-05-12 18:01:32 +00006573 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6574 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006575
Jonathan Peyton30419822017-05-12 18:01:32 +00006576 __kmp_global.g.g_dynamic = FALSE;
6577 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006578
Jonathan Peyton30419822017-05-12 18:01:32 +00006579 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006580
Jonathan Peyton30419822017-05-12 18:01:32 +00006581// Print all messages in message catalog for testing purposes.
6582#ifdef KMP_DEBUG
6583 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6584 if (__kmp_str_match_true(val)) {
6585 kmp_str_buf_t buffer;
6586 __kmp_str_buf_init(&buffer);
6587 __kmp_i18n_dump_catalog(&buffer);
6588 __kmp_printf("%s", buffer.str);
6589 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006590 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006591 __kmp_env_free(&val);
6592#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006593
Jonathan Peyton30419822017-05-12 18:01:32 +00006594 __kmp_threads_capacity =
6595 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6596 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6597 __kmp_tp_capacity = __kmp_default_tp_capacity(
6598 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006599
Jonathan Peyton30419822017-05-12 18:01:32 +00006600 // If the library is shut down properly, both pools must be NULL. Just in
6601 // case, set them to NULL -- some memory may leak, but subsequent code will
6602 // work even if pools are not freed.
6603 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6604 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6605 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6606 __kmp_thread_pool = NULL;
6607 __kmp_thread_pool_insert_pt = NULL;
6608 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006609
Jonathan Peyton30419822017-05-12 18:01:32 +00006610 /* Allocate all of the variable sized records */
6611 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6612 * expandable */
6613 /* Since allocation is cache-aligned, just add extra padding at the end */
6614 size =
6615 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6616 CACHE_LINE;
6617 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6618 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6619 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006620
Jonathan Peyton30419822017-05-12 18:01:32 +00006621 /* init thread counts */
6622 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6623 0); // Asserts fail if the library is reinitializing and
6624 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6625 __kmp_all_nth = 0;
6626 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006627
Jonathan Peyton30419822017-05-12 18:01:32 +00006628 /* setup the uber master thread and hierarchy */
6629 gtid = __kmp_register_root(TRUE);
6630 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6631 KMP_ASSERT(KMP_UBER_GTID(gtid));
6632 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006633
Jonathan Peyton30419822017-05-12 18:01:32 +00006634 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006635
Jonathan Peyton30419822017-05-12 18:01:32 +00006636 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006637
Jonathan Peyton30419822017-05-12 18:01:32 +00006638#if KMP_OS_UNIX
6639 /* invoke the child fork handler */
6640 __kmp_register_atfork();
6641#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006642
Jonathan Peyton30419822017-05-12 18:01:32 +00006643#if !defined KMP_DYNAMIC_LIB
6644 {
6645 /* Invoke the exit handler when the program finishes, only for static
6646 library. For dynamic library, we already have _fini and DllMain. */
6647 int rc = atexit(__kmp_internal_end_atexit);
6648 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006649 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6650 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006651 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006652 }
6653#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006654
Jonathan Peyton30419822017-05-12 18:01:32 +00006655#if KMP_HANDLE_SIGNALS
6656#if KMP_OS_UNIX
6657 /* NOTE: make sure that this is called before the user installs their own
6658 signal handlers so that the user handlers are called first. this way they
6659 can return false, not call our handler, avoid terminating the library, and
6660 continue execution where they left off. */
6661 __kmp_install_signals(FALSE);
6662#endif /* KMP_OS_UNIX */
6663#if KMP_OS_WINDOWS
6664 __kmp_install_signals(TRUE);
6665#endif /* KMP_OS_WINDOWS */
6666#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006667
Jonathan Peyton30419822017-05-12 18:01:32 +00006668 /* we have finished the serial initialization */
6669 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006670
Jonathan Peyton30419822017-05-12 18:01:32 +00006671 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006672
Jonathan Peyton30419822017-05-12 18:01:32 +00006673 if (__kmp_settings) {
6674 __kmp_env_print();
6675 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006676
6677#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006678 if (__kmp_display_env || __kmp_display_env_verbose) {
6679 __kmp_env_print_2();
6680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006681#endif // OMP_40_ENABLED
6682
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006683#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006684 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006685#endif
6686
Jonathan Peyton30419822017-05-12 18:01:32 +00006687 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006688
Jonathan Peyton30419822017-05-12 18:01:32 +00006689 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006690}
6691
Jonathan Peyton30419822017-05-12 18:01:32 +00006692void __kmp_serial_initialize(void) {
6693 if (__kmp_init_serial) {
6694 return;
6695 }
6696 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6697 if (__kmp_init_serial) {
6698 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6699 return;
6700 }
6701 __kmp_do_serial_initialize();
6702 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6703}
6704
6705static void __kmp_do_middle_initialize(void) {
6706 int i, j;
6707 int prev_dflt_team_nth;
6708
6709 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006710 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006711 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006712
Jonathan Peyton30419822017-05-12 18:01:32 +00006713 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006714
Jonathan Peyton30419822017-05-12 18:01:32 +00006715 // Save the previous value for the __kmp_dflt_team_nth so that
6716 // we can avoid some reinitialization if it hasn't changed.
6717 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006718
Alp Toker98758b02014-03-02 04:12:06 +00006719#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006720 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6721 // number of cores on the machine.
6722 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006723
Jonathan Peyton30419822017-05-12 18:01:32 +00006724 // Run through the __kmp_threads array and set the affinity mask
6725 // for each root thread that is currently registered with the RTL.
6726 for (i = 0; i < __kmp_threads_capacity; i++) {
6727 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6728 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006729 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006730 }
Alp Toker98758b02014-03-02 04:12:06 +00006731#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006732
Jonathan Peyton30419822017-05-12 18:01:32 +00006733 KMP_ASSERT(__kmp_xproc > 0);
6734 if (__kmp_avail_proc == 0) {
6735 __kmp_avail_proc = __kmp_xproc;
6736 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006737
Jonathan Peyton30419822017-05-12 18:01:32 +00006738 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6739 // correct them now
6740 j = 0;
6741 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6742 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6743 __kmp_avail_proc;
6744 j++;
6745 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006746
Jonathan Peyton30419822017-05-12 18:01:32 +00006747 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006748#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006749 // Default #threads = #cores
6750 __kmp_dflt_team_nth = __kmp_ncores;
6751 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6752 "__kmp_ncores (%d)\n",
6753 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006754#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006755 // Default #threads = #available OS procs
6756 __kmp_dflt_team_nth = __kmp_avail_proc;
6757 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6758 "__kmp_avail_proc(%d)\n",
6759 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006760#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006761 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006762
Jonathan Peyton30419822017-05-12 18:01:32 +00006763 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6764 __kmp_dflt_team_nth = KMP_MIN_NTH;
6765 }
6766 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6767 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6768 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006769
Jonathan Peyton30419822017-05-12 18:01:32 +00006770 // There's no harm in continuing if the following check fails,
6771 // but it indicates an error in the previous logic.
6772 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006773
Jonathan Peyton30419822017-05-12 18:01:32 +00006774 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6775 // Run through the __kmp_threads array and set the num threads icv for each
6776 // root thread that is currently registered with the RTL (which has not
6777 // already explicitly set its nthreads-var with a call to
6778 // omp_set_num_threads()).
6779 for (i = 0; i < __kmp_threads_capacity; i++) {
6780 kmp_info_t *thread = __kmp_threads[i];
6781 if (thread == NULL)
6782 continue;
6783 if (thread->th.th_current_task->td_icvs.nproc != 0)
6784 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006785
Jonathan Peyton30419822017-05-12 18:01:32 +00006786 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006787 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006788 }
6789 KA_TRACE(
6790 20,
6791 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6792 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006793
6794#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006795 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6796 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6797 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6798 if (__kmp_nth > __kmp_avail_proc) {
6799 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006800 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006801 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006802#endif /* KMP_ADJUST_BLOCKTIME */
6803
Jonathan Peyton30419822017-05-12 18:01:32 +00006804 /* we have finished middle initialization */
6805 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006806
Jonathan Peyton30419822017-05-12 18:01:32 +00006807 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006808}
6809
Jonathan Peyton30419822017-05-12 18:01:32 +00006810void __kmp_middle_initialize(void) {
6811 if (__kmp_init_middle) {
6812 return;
6813 }
6814 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6815 if (__kmp_init_middle) {
6816 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6817 return;
6818 }
6819 __kmp_do_middle_initialize();
6820 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6821}
6822
6823void __kmp_parallel_initialize(void) {
6824 int gtid = __kmp_entry_gtid(); // this might be a new root
6825
6826 /* synchronize parallel initialization (for sibling) */
6827 if (TCR_4(__kmp_init_parallel))
6828 return;
6829 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6830 if (TCR_4(__kmp_init_parallel)) {
6831 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6832 return;
6833 }
6834
6835 /* TODO reinitialization after we have already shut down */
6836 if (TCR_4(__kmp_global.g.g_done)) {
6837 KA_TRACE(
6838 10,
6839 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6840 __kmp_infinite_loop();
6841 }
6842
6843 /* jc: The lock __kmp_initz_lock is already held, so calling
6844 __kmp_serial_initialize would cause a deadlock. So we call
6845 __kmp_do_serial_initialize directly. */
6846 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006847 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006848 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006849
Jonathan Peyton30419822017-05-12 18:01:32 +00006850 /* begin initialization */
6851 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6852 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006853
6854#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006855 // Save the FP control regs.
6856 // Worker threads will set theirs to these values at thread startup.
6857 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6858 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6859 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006860#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6861
6862#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006863#if KMP_HANDLE_SIGNALS
6864 /* must be after __kmp_serial_initialize */
6865 __kmp_install_signals(TRUE);
6866#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006867#endif
6868
Jonathan Peyton30419822017-05-12 18:01:32 +00006869 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006870
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006871#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006872 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6873 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6874 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006875#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006876 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6877 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6878 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006879#endif
6880
Jonathan Peyton30419822017-05-12 18:01:32 +00006881 if (__kmp_version) {
6882 __kmp_print_version_2();
6883 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884
Jonathan Peyton30419822017-05-12 18:01:32 +00006885 /* we have finished parallel initialization */
6886 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006887
Jonathan Peyton30419822017-05-12 18:01:32 +00006888 KMP_MB();
6889 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006890
Jonathan Peyton30419822017-05-12 18:01:32 +00006891 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006892}
6893
Jim Cownie5e8470a2013-09-27 10:38:44 +00006894/* ------------------------------------------------------------------------ */
6895
Jonathan Peyton30419822017-05-12 18:01:32 +00006896void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6897 kmp_team_t *team) {
6898 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006899
Jonathan Peyton30419822017-05-12 18:01:32 +00006900 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006901
Jonathan Peyton30419822017-05-12 18:01:32 +00006902 /* none of the threads have encountered any constructs, yet. */
6903 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006904#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006905 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006906#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006907 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6908 KMP_DEBUG_ASSERT(dispatch);
6909 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6910 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6911 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006912
Jonathan Peyton30419822017-05-12 18:01:32 +00006913 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006914#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006915 dispatch->th_doacross_buf_idx =
6916 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006917#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006918 if (__kmp_env_consistency_check)
6919 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006920
Jonathan Peyton30419822017-05-12 18:01:32 +00006921 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006922}
6923
Jonathan Peyton30419822017-05-12 18:01:32 +00006924void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6925 kmp_team_t *team) {
6926 if (__kmp_env_consistency_check)
6927 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006928
Jonathan Peyton30419822017-05-12 18:01:32 +00006929 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006930}
6931
Jonathan Peyton30419822017-05-12 18:01:32 +00006932int __kmp_invoke_task_func(int gtid) {
6933 int rc;
6934 int tid = __kmp_tid_from_gtid(gtid);
6935 kmp_info_t *this_thr = __kmp_threads[gtid];
6936 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006937
Jonathan Peyton30419822017-05-12 18:01:32 +00006938 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006939#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006940 if (__itt_stack_caller_create_ptr) {
6941 __kmp_itt_stack_callee_enter(
6942 (__itt_caller)
6943 team->t.t_stack_id); // inform ittnotify about entering user's code
6944 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006945#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006946#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006947 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006948#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006949
6950#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006951 void *dummy;
6952 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00006953 ompt_data_t *my_task_data;
6954 ompt_data_t *my_parallel_data;
6955 int ompt_team_size;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006956
Joachim Protze82e94a52017-11-01 10:08:30 +00006957 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00006958 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
6959 .ompt_task_info.frame.exit_runtime_frame);
6960 } else {
6961 exit_runtime_p = &dummy;
6962 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006963
Joachim Protze82e94a52017-11-01 10:08:30 +00006964 my_task_data =
6965 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6966 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6967 if (ompt_enabled.ompt_callback_implicit_task) {
6968 ompt_team_size = team->t.t_nproc;
6969 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6970 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6971 __kmp_tid_from_gtid(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +00006972 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006973#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006974
Jonathan Peyton30419822017-05-12 18:01:32 +00006975 {
6976 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6977 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6978 rc =
6979 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6980 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006981#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006982 ,
6983 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006984#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006985 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006986#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006987 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006988#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006989 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006990
Jim Cownie5e8470a2013-09-27 10:38:44 +00006991#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006992 if (__itt_stack_caller_create_ptr) {
6993 __kmp_itt_stack_callee_leave(
6994 (__itt_caller)
6995 team->t.t_stack_id); // inform ittnotify about leaving user's code
6996 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006997#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006998 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006999
Jonathan Peyton30419822017-05-12 18:01:32 +00007000 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007001}
7002
7003#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007004void __kmp_teams_master(int gtid) {
7005 // This routine is called by all master threads in teams construct
7006 kmp_info_t *thr = __kmp_threads[gtid];
7007 kmp_team_t *team = thr->th.th_team;
7008 ident_t *loc = team->t.t_ident;
7009 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7010 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7011 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7012 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7013 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7014// Launch league of teams now, but not let workers execute
7015// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007016#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007017 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007018#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007019 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Jonathan Peyton30419822017-05-12 18:01:32 +00007020 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7021 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007022#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007023 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007024#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00007025
Jonathan Peyton30419822017-05-12 18:01:32 +00007026 // AC: last parameter "1" eliminates join barrier which won't work because
7027 // worker threads are in a fork barrier waiting for more parallel regions
7028 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007029#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007030 ,
7031 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007032#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007033 ,
7034 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035}
7036
Jonathan Peyton30419822017-05-12 18:01:32 +00007037int __kmp_invoke_teams_master(int gtid) {
7038 kmp_info_t *this_thr = __kmp_threads[gtid];
7039 kmp_team_t *team = this_thr->th.th_team;
7040#if KMP_DEBUG
7041 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7042 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7043 (void *)__kmp_teams_master);
7044#endif
7045 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7046 __kmp_teams_master(gtid);
7047 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7048 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007049}
7050#endif /* OMP_40_ENABLED */
7051
7052/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007053 encountered by this team. since this should be enclosed in the forkjoin
7054 critical section it should avoid race conditions with assymmetrical nested
7055 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007056
Jonathan Peyton30419822017-05-12 18:01:32 +00007057void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7058 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007059
Jonathan Peyton30419822017-05-12 18:01:32 +00007060 if (num_threads > 0)
7061 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007062}
7063
7064#if OMP_40_ENABLED
7065
7066/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007067 the number of threads for the next parallel region encountered */
7068void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7069 int num_threads) {
7070 kmp_info_t *thr = __kmp_threads[gtid];
7071 KMP_DEBUG_ASSERT(num_teams >= 0);
7072 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007073
Jonathan Peyton30419822017-05-12 18:01:32 +00007074 if (num_teams == 0)
7075 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007076 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007077 if (!__kmp_reserve_warn) {
7078 __kmp_reserve_warn = 1;
7079 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007080 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007081 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007082 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007083 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007084 }
7085 // Set number of teams (number of threads in the outer "parallel" of the
7086 // teams)
7087 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007088
Jonathan Peyton30419822017-05-12 18:01:32 +00007089 // Remember the number of threads for inner parallel regions
7090 if (num_threads == 0) {
7091 if (!TCR_4(__kmp_init_middle))
7092 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7093 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007094 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007095 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007096 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007097 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007098 } else {
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007099 if (num_teams * num_threads > __kmp_teams_max_nth) {
7100 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007101 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007102 __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007103 __kmp_msg(kmp_ms_warning,
7104 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7105 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7106 }
7107 num_threads = new_threads;
7108 }
7109 }
7110 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007111}
7112
Jim Cownie5e8470a2013-09-27 10:38:44 +00007113// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007114void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7115 kmp_info_t *thr = __kmp_threads[gtid];
7116 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007117}
7118
7119#endif /* OMP_40_ENABLED */
7120
7121/* Launch the worker threads into the microtask. */
7122
Jonathan Peyton30419822017-05-12 18:01:32 +00007123void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7124 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007125
7126#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007127 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007128#endif /* KMP_DEBUG */
7129
Jonathan Peyton30419822017-05-12 18:01:32 +00007130 KMP_DEBUG_ASSERT(team);
7131 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7132 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7133 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007134
Jonathan Peyton30419822017-05-12 18:01:32 +00007135 team->t.t_construct = 0; /* no single directives seen yet */
7136 team->t.t_ordered.dt.t_value =
7137 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007138
Jonathan Peyton30419822017-05-12 18:01:32 +00007139 /* Reset the identifiers on the dispatch buffer */
7140 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7141 if (team->t.t_max_nproc > 1) {
7142 int i;
7143 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7144 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007145#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007146 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007147#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007148 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007149 } else {
7150 team->t.t_disp_buffer[0].buffer_index = 0;
7151#if OMP_45_ENABLED
7152 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7153#endif
7154 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007155
Jonathan Peyton30419822017-05-12 18:01:32 +00007156 KMP_MB(); /* Flush all pending memory write invalidates. */
7157 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007158
7159#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007160 for (f = 0; f < team->t.t_nproc; f++) {
7161 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7162 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7163 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007164#endif /* KMP_DEBUG */
7165
Jonathan Peyton30419822017-05-12 18:01:32 +00007166 /* release the worker threads so they may begin working */
7167 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007168}
7169
Jonathan Peyton30419822017-05-12 18:01:32 +00007170void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7171 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007172
Jonathan Peyton30419822017-05-12 18:01:32 +00007173 KMP_DEBUG_ASSERT(team);
7174 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7175 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7176 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007177
Jonathan Peyton30419822017-05-12 18:01:32 +00007178/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007179
7180#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007181 if (__kmp_threads[gtid] &&
7182 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7183 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7184 __kmp_threads[gtid]);
7185 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7186 "team->t.t_nproc=%d\n",
7187 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7188 team->t.t_nproc);
7189 __kmp_print_structure();
7190 }
7191 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7192 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007193#endif /* KMP_DEBUG */
7194
Jonathan Peyton30419822017-05-12 18:01:32 +00007195 __kmp_join_barrier(gtid); /* wait for everyone */
Joachim Protze82e94a52017-11-01 10:08:30 +00007196#if OMPT_SUPPORT
7197 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7198 if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7199 ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
7200 ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
7201 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7202#if OMPT_OPTIONAL
7203 void *codeptr = NULL;
7204 if (KMP_MASTER_TID(ds_tid) &&
7205 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7206 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7207 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7208
7209 if (ompt_enabled.ompt_callback_sync_region_wait) {
7210 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7211 ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
7212 }
7213 if (ompt_enabled.ompt_callback_sync_region) {
7214 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7215 ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
7216 }
7217#endif
7218 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7219 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7220 ompt_scope_end, NULL, tId, 0, ds_tid);
7221 }
7222 // return to idle state
7223 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7224 }
7225#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007226
Jonathan Peyton30419822017-05-12 18:01:32 +00007227 KMP_MB(); /* Flush all pending memory write invalidates. */
7228 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007229}
7230
Jim Cownie5e8470a2013-09-27 10:38:44 +00007231/* ------------------------------------------------------------------------ */
7232
7233#ifdef USE_LOAD_BALANCE
7234
Jim Cownie5e8470a2013-09-27 10:38:44 +00007235// Return the worker threads actively spinning in the hot team, if we
7236// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007237static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7238 int i;
7239 int retval;
7240 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007241
Jonathan Peyton30419822017-05-12 18:01:32 +00007242 if (root->r.r_active) {
7243 return 0;
7244 }
7245 hot_team = root->r.r_hot_team;
7246 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7247 return hot_team->t.t_nproc - 1; // Don't count master thread
7248 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007249
Jonathan Peyton30419822017-05-12 18:01:32 +00007250 // Skip the master thread - it is accounted for elsewhere.
7251 retval = 0;
7252 for (i = 1; i < hot_team->t.t_nproc; i++) {
7253 if (hot_team->t.t_threads[i]->th.th_active) {
7254 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007255 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007256 }
7257 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007258}
7259
Jim Cownie5e8470a2013-09-27 10:38:44 +00007260// Perform an automatic adjustment to the number of
7261// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007262static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7263 int retval;
7264 int pool_active;
7265 int hot_team_active;
7266 int team_curr_active;
7267 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007268
Jonathan Peyton30419822017-05-12 18:01:32 +00007269 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7270 set_nproc));
7271 KMP_DEBUG_ASSERT(root);
7272 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7273 ->th.th_current_task->td_icvs.dynamic == TRUE);
7274 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007275
Jonathan Peyton30419822017-05-12 18:01:32 +00007276 if (set_nproc == 1) {
7277 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7278 return 1;
7279 }
7280
7281 // Threads that are active in the thread pool, active in the hot team for this
7282 // particular root (if we are at the outer par level), and the currently
7283 // executing thread (to become the master) are available to add to the new
7284 // team, but are currently contributing to the system load, and must be
7285 // accounted for.
7286 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7287 hot_team_active = __kmp_active_hot_team_nproc(root);
7288 team_curr_active = pool_active + hot_team_active + 1;
7289
7290 // Check the system load.
7291 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7292 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7293 "hot team active = %d\n",
7294 system_active, pool_active, hot_team_active));
7295
7296 if (system_active < 0) {
7297 // There was an error reading the necessary info from /proc, so use the
7298 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7299 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7300 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7301 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7302
7303 // Make this call behave like the thread limit algorithm.
7304 retval = __kmp_avail_proc - __kmp_nth +
7305 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7306 if (retval > set_nproc) {
7307 retval = set_nproc;
7308 }
7309 if (retval < KMP_MIN_NTH) {
7310 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311 }
7312
Jonathan Peyton30419822017-05-12 18:01:32 +00007313 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7314 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007315 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007316 }
7317
7318 // There is a slight delay in the load balance algorithm in detecting new
7319 // running procs. The real system load at this instant should be at least as
7320 // large as the #active omp thread that are available to add to the team.
7321 if (system_active < team_curr_active) {
7322 system_active = team_curr_active;
7323 }
7324 retval = __kmp_avail_proc - system_active + team_curr_active;
7325 if (retval > set_nproc) {
7326 retval = set_nproc;
7327 }
7328 if (retval < KMP_MIN_NTH) {
7329 retval = KMP_MIN_NTH;
7330 }
7331
7332 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7333 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007334} // __kmp_load_balance_nproc()
7335
7336#endif /* USE_LOAD_BALANCE */
7337
Jim Cownie5e8470a2013-09-27 10:38:44 +00007338/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007339
7340/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007341void __kmp_cleanup(void) {
7342 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007343
Jonathan Peyton30419822017-05-12 18:01:32 +00007344 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007345
Jonathan Peyton30419822017-05-12 18:01:32 +00007346 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007347#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007348 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007349#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007350 TCW_4(__kmp_init_parallel, FALSE);
7351 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007352
Jonathan Peyton30419822017-05-12 18:01:32 +00007353 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007354#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007355 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007356#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007357 __kmp_cleanup_hierarchy();
7358 TCW_4(__kmp_init_middle, FALSE);
7359 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007360
Jonathan Peyton30419822017-05-12 18:01:32 +00007361 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007362
Jonathan Peyton30419822017-05-12 18:01:32 +00007363 if (__kmp_init_serial) {
7364 __kmp_runtime_destroy();
7365 __kmp_init_serial = FALSE;
7366 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007367
Jonathan Peyton30419822017-05-12 18:01:32 +00007368 for (f = 0; f < __kmp_threads_capacity; f++) {
7369 if (__kmp_root[f] != NULL) {
7370 __kmp_free(__kmp_root[f]);
7371 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007372 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007373 }
7374 __kmp_free(__kmp_threads);
7375 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7376 // there is no need in freeing __kmp_root.
7377 __kmp_threads = NULL;
7378 __kmp_root = NULL;
7379 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007380
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007381#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007382 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007383#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007384 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007385#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007386
Jonathan Peyton30419822017-05-12 18:01:32 +00007387#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007388 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007389 __kmp_cpuinfo_file = NULL;
7390#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007391
Jonathan Peyton30419822017-05-12 18:01:32 +00007392#if KMP_USE_ADAPTIVE_LOCKS
7393#if KMP_DEBUG_ADAPTIVE_LOCKS
7394 __kmp_print_speculative_stats();
7395#endif
7396#endif
7397 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7398 __kmp_nested_nth.nth = NULL;
7399 __kmp_nested_nth.size = 0;
7400 __kmp_nested_nth.used = 0;
7401 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7402 __kmp_nested_proc_bind.bind_types = NULL;
7403 __kmp_nested_proc_bind.size = 0;
7404 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007405
Jonathan Peyton30419822017-05-12 18:01:32 +00007406 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007408#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007409 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007410#endif
7411
Jonathan Peyton30419822017-05-12 18:01:32 +00007412 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007413}
7414
7415/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007416
7417int __kmp_ignore_mppbeg(void) {
7418 char *env;
7419
7420 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7421 if (__kmp_str_match_false(env))
7422 return FALSE;
7423 }
7424 // By default __kmpc_begin() is no-op.
7425 return TRUE;
7426}
7427
7428int __kmp_ignore_mppend(void) {
7429 char *env;
7430
7431 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7432 if (__kmp_str_match_false(env))
7433 return FALSE;
7434 }
7435 // By default __kmpc_end() is no-op.
7436 return TRUE;
7437}
7438
7439void __kmp_internal_begin(void) {
7440 int gtid;
7441 kmp_root_t *root;
7442
7443 /* this is a very important step as it will register new sibling threads
7444 and assign these new uber threads a new gtid */
7445 gtid = __kmp_entry_gtid();
7446 root = __kmp_threads[gtid]->th.th_root;
7447 KMP_ASSERT(KMP_UBER_GTID(gtid));
7448
7449 if (root->r.r_begin)
7450 return;
7451 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7452 if (root->r.r_begin) {
7453 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7454 return;
7455 }
7456
7457 root->r.r_begin = TRUE;
7458
7459 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7460}
7461
Jim Cownie5e8470a2013-09-27 10:38:44 +00007462/* ------------------------------------------------------------------------ */
7463
Jonathan Peyton30419822017-05-12 18:01:32 +00007464void __kmp_user_set_library(enum library_type arg) {
7465 int gtid;
7466 kmp_root_t *root;
7467 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007468
Jonathan Peyton30419822017-05-12 18:01:32 +00007469 /* first, make sure we are initialized so we can get our gtid */
7470
7471 gtid = __kmp_entry_gtid();
7472 thread = __kmp_threads[gtid];
7473
7474 root = thread->th.th_root;
7475
7476 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7477 library_serial));
7478 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7479 thread */
7480 KMP_WARNING(SetLibraryIncorrectCall);
7481 return;
7482 }
7483
7484 switch (arg) {
7485 case library_serial:
7486 thread->th.th_set_nproc = 0;
7487 set__nproc(thread, 1);
7488 break;
7489 case library_turnaround:
7490 thread->th.th_set_nproc = 0;
7491 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7492 : __kmp_dflt_team_nth_ub);
7493 break;
7494 case library_throughput:
7495 thread->th.th_set_nproc = 0;
7496 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7497 : __kmp_dflt_team_nth_ub);
7498 break;
7499 default:
7500 KMP_FATAL(UnknownLibraryType, arg);
7501 }
7502
7503 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007504}
7505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506void __kmp_aux_set_stacksize(size_t arg) {
7507 if (!__kmp_init_serial)
7508 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007509
7510#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007511 if (arg & (0x1000 - 1)) {
7512 arg &= ~(0x1000 - 1);
7513 if (arg + 0x1000) /* check for overflow if we round up */
7514 arg += 0x1000;
7515 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007516#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007517 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007518
Jonathan Peyton30419822017-05-12 18:01:32 +00007519 /* only change the default stacksize before the first parallel region */
7520 if (!TCR_4(__kmp_init_parallel)) {
7521 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007522
Jonathan Peyton30419822017-05-12 18:01:32 +00007523 if (value < __kmp_sys_min_stksize)
7524 value = __kmp_sys_min_stksize;
7525 else if (value > KMP_MAX_STKSIZE)
7526 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007527
Jonathan Peyton30419822017-05-12 18:01:32 +00007528 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007529
Jonathan Peyton30419822017-05-12 18:01:32 +00007530 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7531 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007532
Jonathan Peyton30419822017-05-12 18:01:32 +00007533 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007534}
7535
7536/* set the behaviour of the runtime library */
7537/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007538void __kmp_aux_set_library(enum library_type arg) {
7539 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007540
Jonathan Peyton30419822017-05-12 18:01:32 +00007541 switch (__kmp_library) {
7542 case library_serial: {
7543 KMP_INFORM(LibraryIsSerial);
7544 (void)__kmp_change_library(TRUE);
7545 } break;
7546 case library_turnaround:
7547 (void)__kmp_change_library(TRUE);
7548 break;
7549 case library_throughput:
7550 (void)__kmp_change_library(FALSE);
7551 break;
7552 default:
7553 KMP_FATAL(UnknownLibraryType, arg);
7554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007555}
7556
7557/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007558
Jonathan Peyton30419822017-05-12 18:01:32 +00007559void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7560 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007561#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007562 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007563#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007564 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007565
Jonathan Peyton30419822017-05-12 18:01:32 +00007566 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007567
Jonathan Peyton30419822017-05-12 18:01:32 +00007568 /* Normalize and set blocktime for the teams */
7569 if (blocktime < KMP_MIN_BLOCKTIME)
7570 blocktime = KMP_MIN_BLOCKTIME;
7571 else if (blocktime > KMP_MAX_BLOCKTIME)
7572 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007573
Jonathan Peyton30419822017-05-12 18:01:32 +00007574 set__blocktime_team(thread->th.th_team, tid, blocktime);
7575 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007576
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007577#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007578 /* Calculate and set blocktime intervals for the teams */
7579 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007580
Jonathan Peyton30419822017-05-12 18:01:32 +00007581 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7582 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007583#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007584
Jonathan Peyton30419822017-05-12 18:01:32 +00007585 /* Set whether blocktime has been set to "TRUE" */
7586 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007587
Jonathan Peyton30419822017-05-12 18:01:32 +00007588 set__bt_set_team(thread->th.th_team, tid, bt_set);
7589 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007590#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007591 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7592 "bt_intervals=%d, monitor_updates=%d\n",
7593 __kmp_gtid_from_tid(tid, thread->th.th_team),
7594 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7595 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007596#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007597 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7598 __kmp_gtid_from_tid(tid, thread->th.th_team),
7599 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007600#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007601}
7602
Jonathan Peyton30419822017-05-12 18:01:32 +00007603void __kmp_aux_set_defaults(char const *str, int len) {
7604 if (!__kmp_init_serial) {
7605 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00007606 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007607 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007608
Jonathan Peyton30419822017-05-12 18:01:32 +00007609 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007610#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007611 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007612#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007613 ) {
7614 __kmp_env_print();
7615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007616} // __kmp_aux_set_defaults
7617
7618/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007619/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007620
Jim Cownie5e8470a2013-09-27 10:38:44 +00007621PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007622__kmp_determine_reduction_method(
7623 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7624 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7625 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007626
Jonathan Peyton30419822017-05-12 18:01:32 +00007627 // Default reduction method: critical construct ( lck != NULL, like in current
7628 // PAROPT )
7629 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7630 // can be selected by RTL
7631 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7632 // can be selected by RTL
7633 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7634 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007635
Jonathan Peyton30419822017-05-12 18:01:32 +00007636 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007637
Jonathan Peyton30419822017-05-12 18:01:32 +00007638 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007639
Jonathan Peyton30419822017-05-12 18:01:32 +00007640 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7641 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007642
Jonathan Peyton30419822017-05-12 18:01:32 +00007643#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7644 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7645#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007646
Jonathan Peyton30419822017-05-12 18:01:32 +00007647 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007648
Jonathan Peyton30419822017-05-12 18:01:32 +00007649 // another choice of getting a team size (with 1 dynamic deference) is slower
7650 team_size = __kmp_get_team_num_threads(global_tid);
7651 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007652
Jonathan Peyton30419822017-05-12 18:01:32 +00007653 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007654
Jonathan Peyton30419822017-05-12 18:01:32 +00007655 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007656
Jonathan Peyton30419822017-05-12 18:01:32 +00007657 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7658 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007659
Jonathan Peyton30419822017-05-12 18:01:32 +00007660#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007661
Jonathan Peyton30419822017-05-12 18:01:32 +00007662#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7663 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007664
Jonathan Peyton30419822017-05-12 18:01:32 +00007665 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007666
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007667#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007668 if (__kmp_mic_type != non_mic) {
7669 teamsize_cutoff = 8;
7670 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007671#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007672 if (tree_available) {
7673 if (team_size <= teamsize_cutoff) {
7674 if (atomic_available) {
7675 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007676 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007677 } else {
7678 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7679 }
7680 } else if (atomic_available) {
7681 retval = atomic_reduce_block;
7682 }
7683#else
7684#error "Unknown or unsupported OS"
7685#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7686// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007687
Jonathan Peyton30419822017-05-12 18:01:32 +00007688#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7689
7690#if KMP_OS_LINUX || KMP_OS_WINDOWS
7691
7692 // basic tuning
7693
7694 if (atomic_available) {
7695 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7696 retval = atomic_reduce_block;
7697 }
7698 } // otherwise: use critical section
7699
7700#elif KMP_OS_DARWIN
7701
7702 if (atomic_available && (num_vars <= 3)) {
7703 retval = atomic_reduce_block;
7704 } else if (tree_available) {
7705 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7706 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7707 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7708 }
7709 } // otherwise: use critical section
7710
7711#else
7712#error "Unknown or unsupported OS"
7713#endif
7714
7715#else
7716#error "Unknown or unsupported architecture"
7717#endif
7718 }
7719
7720 // KMP_FORCE_REDUCTION
7721
7722 // If the team is serialized (team_size == 1), ignore the forced reduction
7723 // method and stay with the unsynchronized method (empty_reduce_block)
7724 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7725 team_size != 1) {
7726
7727 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7728
7729 int atomic_available, tree_available;
7730
7731 switch ((forced_retval = __kmp_force_reduction_method)) {
7732 case critical_reduce_block:
7733 KMP_ASSERT(lck); // lck should be != 0
7734 break;
7735
7736 case atomic_reduce_block:
7737 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7738 if (!atomic_available) {
7739 KMP_WARNING(RedMethodNotSupported, "atomic");
7740 forced_retval = critical_reduce_block;
7741 }
7742 break;
7743
7744 case tree_reduce_block:
7745 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7746 if (!tree_available) {
7747 KMP_WARNING(RedMethodNotSupported, "tree");
7748 forced_retval = critical_reduce_block;
7749 } else {
7750#if KMP_FAST_REDUCTION_BARRIER
7751 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7752#endif
7753 }
7754 break;
7755
7756 default:
7757 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007758 }
7759
Jonathan Peyton30419822017-05-12 18:01:32 +00007760 retval = forced_retval;
7761 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007762
Jonathan Peyton30419822017-05-12 18:01:32 +00007763 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007764
Jonathan Peyton30419822017-05-12 18:01:32 +00007765#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7766#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7767
7768 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007769}
7770
7771// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007772kmp_int32 __kmp_get_reduce_method(void) {
7773 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007774}