blob: 3c408737ca5c8544bdf26c5bf65b870164246934 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_i18n.h"
20#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000021#include "kmp_itt.h"
22#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000023#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000024#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000027
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000028#if OMPT_SUPPORT
29#include "ompt-specific.h"
30#endif
31
Jim Cownie5e8470a2013-09-27 10:38:44 +000032/* these are temporary issues to be dealt with */
33#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000034
Jim Cownie5e8470a2013-09-27 10:38:44 +000035#if KMP_OS_WINDOWS
36#include <process.h>
37#endif
38
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000039#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000040
41#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000042char const __kmp_version_alt_comp[] =
43 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000044#endif /* defined(KMP_GOMP_COMPAT) */
45
46char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000047#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000048 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000049#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000050 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000053#else
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000055#endif
56
57#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000058char const __kmp_version_lock[] =
59 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jonathan Peyton30419822017-05-12 18:01:32 +000062#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000063
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000065
66kmp_info_t __kmp_monitor;
67
Jim Cownie5e8470a2013-09-27 10:38:44 +000068/* Forward declarations */
69
Jonathan Peyton30419822017-05-12 18:01:32 +000070void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
73 int gtid);
74static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
75 kmp_internal_control_t *new_icvs,
76 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000077#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000078static void __kmp_partition_places(kmp_team_t *team,
79 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000080#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000081static void __kmp_do_serial_initialize(void);
82void __kmp_fork_barrier(int gtid, int tid);
83void __kmp_join_barrier(int gtid);
84void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
85 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000086
Jim Cownie5e8470a2013-09-27 10:38:44 +000087#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000088static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000089#endif
90
91static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000092#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000093static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000094#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000095static void __kmp_unregister_library(void); // called by __kmp_internal_end()
96static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +000097static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
98
Jim Cownie5e8470a2013-09-27 10:38:44 +000099/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000100/* fast (and somewhat portable) way to get unique identifier of executing
101 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000102int __kmp_get_global_thread_id() {
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110 KA_TRACE(
111 1000,
112 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114
Jonathan Peyton30419822017-05-12 18:01:32 +0000115 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
116 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
117 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
118 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119
Jonathan Peyton30419822017-05-12 18:01:32 +0000120 if (!TCR_4(__kmp_init_gtid))
121 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122
123#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000124 if (TCR_4(__kmp_gtid_mode) >= 3) {
125 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
126 return __kmp_gtid;
127 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 if (TCR_4(__kmp_gtid_mode) >= 2) {
130 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
131 return __kmp_gtid_get_specific();
132 }
133 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
134
135 stack_addr = (char *)&stack_data;
136 other_threads = __kmp_threads;
137
138 /* ATT: The code below is a source of potential bugs due to unsynchronized
139 access to __kmp_threads array. For example:
140 1. Current thread loads other_threads[i] to thr and checks it, it is
141 non-NULL.
142 2. Current thread is suspended by OS.
143 3. Another thread unregisters and finishes (debug versions of free()
144 may fill memory with something like 0xEF).
145 4. Current thread is resumed.
146 5. Current thread reads junk from *thr.
147 TODO: Fix it. --ln */
148
149 for (i = 0; i < __kmp_threads_capacity; i++) {
150
151 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
152 if (!thr)
153 continue;
154
155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
157
158 /* stack grows down -- search through all of the active threads */
159
160 if (stack_addr <= stack_base) {
161 size_t stack_diff = stack_base - stack_addr;
162
163 if (stack_diff <= stack_size) {
164 /* The only way we can be closer than the allocated */
165 /* stack size is if we are running on this thread. */
166 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
167 return i;
168 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 /* get specific to try and determine our gtid */
173 KA_TRACE(1000,
174 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
175 "thread, using TLS\n"));
176 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177
Jonathan Peyton30419822017-05-12 18:01:32 +0000178 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 /* if we havn't been assigned a gtid, then return code */
181 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000183
184 /* dynamically updated stack window for uber threads to avoid get_specific
185 call */
186 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
187 KMP_FATAL(StackOverflow, i);
188 }
189
190 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
191 if (stack_addr > stack_base) {
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
194 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
195 stack_base);
196 } else {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
198 stack_base - stack_addr);
199 }
200
201 /* Reprint stack bounds for ubermaster since they have been refined */
202 if (__kmp_storage_map) {
203 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
204 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
205 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
206 other_threads[i]->th.th_info.ds.ds_stacksize,
207 "th_%d stack (refinement)", i);
208 }
209 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210}
211
Jonathan Peyton30419822017-05-12 18:01:32 +0000212int __kmp_get_global_thread_id_reg() {
213 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214
Jonathan Peyton30419822017-05-12 18:01:32 +0000215 if (!__kmp_init_serial) {
216 gtid = KMP_GTID_DNE;
217 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000219 if (TCR_4(__kmp_gtid_mode) >= 3) {
220 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
221 gtid = __kmp_gtid;
222 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000224 if (TCR_4(__kmp_gtid_mode) >= 2) {
225 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
226 gtid = __kmp_gtid_get_specific();
227 } else {
228 KA_TRACE(1000,
229 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
230 gtid = __kmp_get_global_thread_id();
231 }
232
233 /* we must be a new uber master sibling thread */
234 if (gtid == KMP_GTID_DNE) {
235 KA_TRACE(10,
236 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
237 "Registering a new gtid.\n"));
238 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
239 if (!__kmp_init_serial) {
240 __kmp_do_serial_initialize();
241 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000243 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
246 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
247 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252}
253
254/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000255void __kmp_check_stack_overlap(kmp_info_t *th) {
256 int f;
257 char *stack_beg = NULL;
258 char *stack_end = NULL;
259 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
Jonathan Peyton30419822017-05-12 18:01:32 +0000261 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
262 if (__kmp_storage_map) {
263 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
264 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 if (gtid == KMP_GTID_MONITOR) {
269 __kmp_print_storage_map_gtid(
270 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)", "mon",
272 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%d stack (%s)", gtid,
277 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
278 }
279 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 /* No point in checking ubermaster threads since they use refinement and
282 * cannot overlap */
283 gtid = __kmp_gtid_from_thread(th);
284 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
285 KA_TRACE(10,
286 ("__kmp_check_stack_overlap: performing extensive checking\n"));
287 if (stack_beg == NULL) {
288 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
289 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
290 }
291
292 for (f = 0; f < __kmp_threads_capacity; f++) {
293 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
294
295 if (f_th && f_th != th) {
296 char *other_stack_end =
297 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
298 char *other_stack_beg =
299 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
300 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
301 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
302
303 /* Print the other stack values before the abort */
304 if (__kmp_storage_map)
305 __kmp_print_storage_map_gtid(
306 -1, other_stack_beg, other_stack_end,
307 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
308 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
309
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000310 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
311 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 }
313 }
314 }
315 }
316 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
317}
318
319/* ------------------------------------------------------------------------ */
320
321void __kmp_infinite_loop(void) {
322 static int done = FALSE;
323
324 while (!done) {
325 KMP_YIELD(1);
326 }
327}
328
329#define MAX_MESSAGE 512
330
331void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
332 char const *format, ...) {
333 char buffer[MAX_MESSAGE];
334 va_list ap;
335
336 va_start(ap, format);
337 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
338 p2, (unsigned long)size, format);
339 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
340 __kmp_vprintf(kmp_err, buffer, ap);
341#if KMP_PRINT_DATA_PLACEMENT
342 int node;
343 if (gtid >= 0) {
344 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
345 if (__kmp_storage_map_verbose) {
346 node = __kmp_get_host_node(p1);
347 if (node < 0) /* doesn't work, so don't try this next time */
348 __kmp_storage_map_verbose = FALSE;
349 else {
350 char *last;
351 int lastNode;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
353
354 const int page_size = KMP_GET_PAGE_SIZE();
355
356 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
357 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
358 if (localProc >= 0)
359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
360 localProc >> 1);
361 else
362 __kmp_printf_no_lock(" GTID %d\n", gtid);
363#if KMP_USE_PRCTL
364 /* The more elaborate format is disabled for now because of the prctl
365 * hanging bug. */
366 do {
367 last = p1;
368 lastNode = node;
369 /* This loop collates adjacent pages with the same host node. */
370 do {
371 (char *)p1 += page_size;
372 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
373 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
374 lastNode);
375 } while (p1 <= p2);
376#else
377 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
378 (char *)p1 + (page_size - 1),
379 __kmp_get_host_node(p1));
380 if (p1 < p2) {
381 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
382 (char *)p2 + (page_size - 1),
383 __kmp_get_host_node(p2));
384 }
385#endif
386 }
387 }
388 } else
389 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
390 }
391#endif /* KMP_PRINT_DATA_PLACEMENT */
392 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
393}
394
395void __kmp_warn(char const *format, ...) {
396 char buffer[MAX_MESSAGE];
397 va_list ap;
398
399 if (__kmp_generate_warnings == kmp_warnings_off) {
400 return;
401 }
402
403 va_start(ap, format);
404
405 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
406 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
407 __kmp_vprintf(kmp_err, buffer, ap);
408 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
409
410 va_end(ap);
411}
412
413void __kmp_abort_process() {
414 // Later threads may stall here, but that's ok because abort() will kill them.
415 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
416
417 if (__kmp_debug_buf) {
418 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000419 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000420
421 if (KMP_OS_WINDOWS) {
422 // Let other threads know of abnormal termination and prevent deadlock
423 // if abort happened during library initialization or shutdown
424 __kmp_global.g.g_abort = SIGABRT;
425
426 /* On Windows* OS by default abort() causes pop-up error box, which stalls
427 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
428 boxes. _set_abort_behavior() works well, but this function is not
429 available in VS7 (this is not problem for DLL, but it is a problem for
430 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
431 help, at least in some versions of MS C RTL.
432
433 It seems following sequence is the only way to simulate abort() and
434 avoid pop-up error box. */
435 raise(SIGABRT);
436 _exit(3); // Just in case, if signal ignored, exit anyway.
437 } else {
438 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000439 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000440
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000443
444} // __kmp_abort_process
445
Jonathan Peyton30419822017-05-12 18:01:32 +0000446void __kmp_abort_thread(void) {
447 // TODO: Eliminate g_abort global variable and this function.
448 // In case of abort just call abort(), it will kill all the threads.
449 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450} // __kmp_abort_thread
451
Jonathan Peyton30419822017-05-12 18:01:32 +0000452/* Print out the storage map for the major kmp_info_t thread data structures
453 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454
Jonathan Peyton30419822017-05-12 18:01:32 +0000455static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
456 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
457 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000458
Jonathan Peyton30419822017-05-12 18:01:32 +0000459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
460 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000461
Jonathan Peyton30419822017-05-12 18:01:32 +0000462 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
463 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464
Jonathan Peyton30419822017-05-12 18:01:32 +0000465 __kmp_print_storage_map_gtid(
466 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000468
Jonathan Peyton30419822017-05-12 18:01:32 +0000469 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier + 1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
472 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
475 &thr->th.th_bar[bs_forkjoin_barrier + 1],
476 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
477 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479#if KMP_FAST_REDUCTION_BARRIER
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier + 1],
482 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
483 gtid);
484#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000485}
486
Jonathan Peyton30419822017-05-12 18:01:32 +0000487/* Print out the storage map for the major kmp_team_t team data structures
488 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489
Jonathan Peyton30419822017-05-12 18:01:32 +0000490static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
491 int team_id, int num_thr) {
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
497 &team->t.t_bar[bs_last_barrier],
498 sizeof(kmp_balign_team_t) * bs_last_barrier,
499 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
502 &team->t.t_bar[bs_plain_barrier + 1],
503 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
504 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
507 &team->t.t_bar[bs_forkjoin_barrier + 1],
508 sizeof(kmp_balign_team_t),
509 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511#if KMP_FAST_REDUCTION_BARRIER
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
513 &team->t.t_bar[bs_reduction_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[reduction]", header, team_id);
516#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517
Jonathan Peyton30419822017-05-12 18:01:32 +0000518 __kmp_print_storage_map_gtid(
519 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 __kmp_print_storage_map_gtid(
523 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
524 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525
Jonathan Peyton30419822017-05-12 18:01:32 +0000526 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
527 &team->t.t_disp_buffer[num_disp_buff],
528 sizeof(dispatch_shared_info_t) * num_disp_buff,
529 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
532 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
533 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534}
535
536static void __kmp_init_allocator() {}
537static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000538
539/* ------------------------------------------------------------------------ */
540
Jonathan Peyton99016992015-05-26 17:32:53 +0000541#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000542#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
545 // TODO: Change to __kmp_break_bootstrap_lock().
546 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000547}
548
Jonathan Peyton30419822017-05-12 18:01:32 +0000549static void __kmp_reset_locks_on_process_detach(int gtid_req) {
550 int i;
551 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000552
Jonathan Peyton30419822017-05-12 18:01:32 +0000553 // PROCESS_DETACH is expected to be called by a thread that executes
554 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
555 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
556 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
557 // threads can be still alive here, although being about to be terminated. The
558 // threads in the array with ds_thread==0 are most suspicious. Actually, it
559 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000560
Jonathan Peyton30419822017-05-12 18:01:32 +0000561 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 // Let's check that there are no other alive threads registered with the OMP
564 // lib.
565 while (1) {
566 thread_count = 0;
567 for (i = 0; i < __kmp_threads_capacity; ++i) {
568 if (!__kmp_threads)
569 continue;
570 kmp_info_t *th = __kmp_threads[i];
571 if (th == NULL)
572 continue;
573 int gtid = th->th.th_info.ds.ds_gtid;
574 if (gtid == gtid_req)
575 continue;
576 if (gtid < 0)
577 continue;
578 DWORD exit_val;
579 int alive = __kmp_is_thread_alive(th, &exit_val);
580 if (alive) {
581 ++thread_count;
582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 if (thread_count == 0)
585 break; // success
586 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 // Assume that I'm alone. Now it might be safe to check and reset locks.
589 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
590 __kmp_reset_lock(&__kmp_forkjoin_lock);
591#ifdef KMP_DEBUG
592 __kmp_reset_lock(&__kmp_stdio_lock);
593#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594}
595
Jonathan Peyton30419822017-05-12 18:01:32 +0000596BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
597 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598
Jonathan Peyton30419822017-05-12 18:01:32 +0000599 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 case DLL_PROCESS_ATTACH:
602 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603
604 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000605
606 case DLL_PROCESS_DETACH:
607 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
608
609 if (lpReserved != NULL) {
610 // lpReserved is used for telling the difference:
611 // lpReserved == NULL when FreeLibrary() was called,
612 // lpReserved != NULL when the process terminates.
613 // When FreeLibrary() is called, worker threads remain alive. So they will
614 // release the forkjoin lock by themselves. When the process terminates,
615 // worker threads disappear triggering the problem of unreleased forkjoin
616 // lock as described below.
617
618 // A worker thread can take the forkjoin lock. The problem comes up if
619 // that worker thread becomes dead before it releases the forkjoin lock.
620 // The forkjoin lock remains taken, while the thread executing
621 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
622 // to take the forkjoin lock and will always fail, so that the application
623 // will never finish [normally]. This scenario is possible if
624 // __kmpc_end() has not been executed. It looks like it's not a corner
625 // case, but common cases:
626 // - the main function was compiled by an alternative compiler;
627 // - the main function was compiled by icl but without /Qopenmp
628 // (application with plugins);
629 // - application terminates by calling C exit(), Fortran CALL EXIT() or
630 // Fortran STOP.
631 // - alive foreign thread prevented __kmpc_end from doing cleanup.
632 //
633 // This is a hack to work around the problem.
634 // TODO: !!! figure out something better.
635 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
636 }
637
638 __kmp_internal_end_library(__kmp_gtid_get_specific());
639
640 return TRUE;
641
642 case DLL_THREAD_ATTACH:
643 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
644
645 /* if we want to register new siblings all the time here call
646 * __kmp_get_gtid(); */
647 return TRUE;
648
649 case DLL_THREAD_DETACH:
650 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
651
652 __kmp_internal_end_thread(__kmp_gtid_get_specific());
653 return TRUE;
654 }
655
656 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657}
658
Jonathan Peyton30419822017-05-12 18:01:32 +0000659#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000660#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000661
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662/* Change the library type to "status" and return the old type */
663/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000664int __kmp_change_library(int status) {
665 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000666
Jonathan Peyton30419822017-05-12 18:01:32 +0000667 old_status = __kmp_yield_init &
668 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669
Jonathan Peyton30419822017-05-12 18:01:32 +0000670 if (status) {
671 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
672 } else {
673 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675
Jonathan Peyton30419822017-05-12 18:01:32 +0000676 return old_status; // return previous setting of whether
677 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678}
679
Jonathan Peyton30419822017-05-12 18:01:32 +0000680/* __kmp_parallel_deo -- Wait until it's our turn. */
681void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
682 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000684 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685#endif /* BUILD_PARALLEL_ORDERED */
686
Jonathan Peyton30419822017-05-12 18:01:32 +0000687 if (__kmp_env_consistency_check) {
688 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000689#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 if (!team->t.t_serialized) {
697 KMP_MB();
698 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
699 KMP_EQ, NULL);
700 KMP_MB();
701 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702#endif /* BUILD_PARALLEL_ORDERED */
703}
704
Jonathan Peyton30419822017-05-12 18:01:32 +0000705/* __kmp_parallel_dxo -- Signal the next task. */
706void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
707 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000709 int tid = __kmp_tid_from_gtid(gtid);
710 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711#endif /* BUILD_PARALLEL_ORDERED */
712
Jonathan Peyton30419822017-05-12 18:01:32 +0000713 if (__kmp_env_consistency_check) {
714 if (__kmp_threads[gtid]->th.th_root->r.r_active)
715 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
716 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000717#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000718 if (!team->t.t_serialized) {
719 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720
Jonathan Peyton30419822017-05-12 18:01:32 +0000721 /* use the tid of the next thread in this team */
722 /* TODO replace with general release procedure */
723 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000725#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +0000726 if (ompt_enabled &&
727 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
728 /* accept blame for "ordered" waiting */
729 kmp_info_t *this_thread = __kmp_threads[gtid];
730 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
731 this_thread->th.ompt_thread_info.wait_id);
732 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000733#endif
734
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 KMP_MB(); /* Flush all pending memory write invalidates. */
736 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737#endif /* BUILD_PARALLEL_ORDERED */
738}
739
740/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741/* The BARRIER for a SINGLE process section is always explicit */
742
Jonathan Peyton30419822017-05-12 18:01:32 +0000743int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
744 int status;
745 kmp_info_t *th;
746 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 if (!TCR_4(__kmp_init_parallel))
749 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 th = __kmp_threads[gtid];
752 team = th->th.th_team;
753 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754
Jonathan Peyton30419822017-05-12 18:01:32 +0000755 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000756
Jonathan Peyton30419822017-05-12 18:01:32 +0000757 if (team->t.t_serialized) {
758 status = 1;
759 } else {
760 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 ++th->th.th_local.this_construct;
763 /* try to set team count to thread count--success means thread got the
764 single block */
765 /* TODO: Should this be acquire or release? */
766 if (team->t.t_construct == old_this) {
767 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
768 th->th.th_local.this_construct);
769 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000770#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
772 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000773#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000774 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000775#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 team->t.t_active_level ==
777 1) { // Only report metadata by master of active team at level 1
778 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000779 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000780#endif /* USE_ITT_BUILD */
781 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 if (__kmp_env_consistency_check) {
784 if (status && push_ws) {
785 __kmp_push_workshare(gtid, ct_psingle, id_ref);
786 } else {
787 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 if (status) {
792 __kmp_itt_single_start(gtid);
793 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000795 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796}
797
Jonathan Peyton30419822017-05-12 18:01:32 +0000798void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000799#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000800 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000801#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 if (__kmp_env_consistency_check)
803 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804}
805
Jonathan Peyton30419822017-05-12 18:01:32 +0000806/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807 * how many threads we can use
808 * set_nproc is the number of threads requested for the team
809 * returns 0 if we should serialize or only use one thread,
810 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 * The forkjoin lock is held by the caller. */
812static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
813 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000815 ,
816 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000817#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000818 ) {
819 int capacity;
820 int new_nthreads;
821 KMP_DEBUG_ASSERT(__kmp_init_serial);
822 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000823
Jonathan Peyton30419822017-05-12 18:01:32 +0000824 // If dyn-var is set, dynamically adjust the number of desired threads,
825 // according to the method specified by dynamic_mode.
826 new_nthreads = set_nthreads;
827 if (!get__dynamic_2(parent_team, master_tid)) {
828 ;
829 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000831 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
832 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
833 if (new_nthreads == 1) {
834 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
835 "reservation to 1 thread\n",
836 master_tid));
837 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 if (new_nthreads < set_nthreads) {
840 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
841 "reservation to %d threads\n",
842 master_tid, new_nthreads));
843 }
844 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000845#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
847 new_nthreads = __kmp_avail_proc - __kmp_nth +
848 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
849 if (new_nthreads <= 1) {
850 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
851 "reservation to 1 thread\n",
852 master_tid));
853 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000854 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000855 if (new_nthreads < set_nthreads) {
856 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
857 "reservation to %d threads\n",
858 master_tid, new_nthreads));
859 } else {
860 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000862 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
863 if (set_nthreads > 2) {
864 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
865 new_nthreads = (new_nthreads % set_nthreads) + 1;
866 if (new_nthreads == 1) {
867 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
868 "reservation to 1 thread\n",
869 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000871 }
872 if (new_nthreads < set_nthreads) {
873 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
874 "reservation to %d threads\n",
875 master_tid, new_nthreads));
876 }
877 }
878 } else {
879 KMP_ASSERT(0);
880 }
881
Jonathan Peytonf4392462017-07-27 20:58:41 +0000882 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000883 if (__kmp_nth + new_nthreads -
884 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
885 __kmp_max_nth) {
886 int tl_nthreads = __kmp_max_nth - __kmp_nth +
887 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
888 if (tl_nthreads <= 0) {
889 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890 }
891
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 // If dyn-var is false, emit a 1-time warning.
893 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
894 __kmp_reserve_warn = 1;
895 __kmp_msg(kmp_ms_warning,
896 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
897 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
898 }
899 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000900 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
901 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000902 master_tid));
903 return 1;
904 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000905 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
906 "reservation to %d threads\n",
907 master_tid, tl_nthreads));
908 new_nthreads = tl_nthreads;
909 }
910
911 // Respect OMP_THREAD_LIMIT
912 if (root->r.r_cg_nthreads + new_nthreads -
913 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
914 __kmp_cg_max_nth) {
915 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
916 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
917 if (tl_nthreads <= 0) {
918 tl_nthreads = 1;
919 }
920
921 // If dyn-var is false, emit a 1-time warning.
922 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
923 __kmp_reserve_warn = 1;
924 __kmp_msg(kmp_ms_warning,
925 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
926 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
927 }
928 if (tl_nthreads == 1) {
929 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
930 "reduced reservation to 1 thread\n",
931 master_tid));
932 return 1;
933 }
934 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000935 "reservation to %d threads\n",
936 master_tid, tl_nthreads));
937 new_nthreads = tl_nthreads;
938 }
939
940 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000941 // See comment in __kmp_register_root() about the adjustment if
942 // __kmp_threads[0] == NULL.
943 capacity = __kmp_threads_capacity;
944 if (TCR_PTR(__kmp_threads[0]) == NULL) {
945 --capacity;
946 }
947 if (__kmp_nth + new_nthreads -
948 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
949 capacity) {
950 // Expand the threads array.
951 int slotsRequired = __kmp_nth + new_nthreads -
952 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
953 capacity;
954 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
955 if (slotsAdded < slotsRequired) {
956 // The threads array was not expanded enough.
957 new_nthreads -= (slotsRequired - slotsAdded);
958 KMP_ASSERT(new_nthreads >= 1);
959
960 // If dyn-var is false, emit a 1-time warning.
961 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
962 __kmp_reserve_warn = 1;
963 if (__kmp_tp_cached) {
964 __kmp_msg(kmp_ms_warning,
965 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
966 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
967 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
968 } else {
969 __kmp_msg(kmp_ms_warning,
970 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
971 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
972 }
973 }
974 }
975 }
976
Jonathan Peyton642688b2017-06-01 16:46:36 +0000977#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000978 if (new_nthreads == 1) {
979 KC_TRACE(10,
980 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
981 "dead roots and rechecking; requested %d threads\n",
982 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000983 } else {
984 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
985 " %d threads\n",
986 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000987 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000988#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000989 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990}
991
Jonathan Peyton30419822017-05-12 18:01:32 +0000992/* Allocate threads from the thread pool and assign them to the new team. We are
993 assured that there are enough threads available, because we checked on that
994 earlier within critical section forkjoin */
995static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
996 kmp_info_t *master_th, int master_gtid) {
997 int i;
998 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000999
Jonathan Peyton30419822017-05-12 18:01:32 +00001000 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
1001 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1002 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001003
Jonathan Peyton30419822017-05-12 18:01:32 +00001004 /* first, let's setup the master thread */
1005 master_th->th.th_info.ds.ds_tid = 0;
1006 master_th->th.th_team = team;
1007 master_th->th.th_team_nproc = team->t.t_nproc;
1008 master_th->th.th_team_master = master_th;
1009 master_th->th.th_team_serialized = FALSE;
1010 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011
Jonathan Peyton30419822017-05-12 18:01:32 +00001012/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001013#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001014 use_hot_team = 0;
1015 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1016 if (hot_teams) { // hot teams array is not allocated if
1017 // KMP_HOT_TEAMS_MAX_LEVEL=0
1018 int level = team->t.t_active_level - 1; // index in array of hot teams
1019 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1020 if (master_th->th.th_teams_size.nteams > 1) {
1021 ++level; // level was not increased in teams construct for
1022 // team_of_masters
1023 }
1024 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1025 master_th->th.th_teams_level == team->t.t_level) {
1026 ++level; // level was not increased in teams construct for
1027 // team_of_workers before the parallel
1028 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001029 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001030 if (level < __kmp_hot_teams_max_level) {
1031 if (hot_teams[level].hot_team) {
1032 // hot team has already been allocated for given level
1033 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1034 use_hot_team = 1; // the team is ready to use
1035 } else {
1036 use_hot_team = 0; // AC: threads are not allocated yet
1037 hot_teams[level].hot_team = team; // remember new hot team
1038 hot_teams[level].hot_team_nth = team->t.t_nproc;
1039 }
1040 } else {
1041 use_hot_team = 0;
1042 }
1043 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001044#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001045 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001046#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001047 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001048
Jonathan Peyton30419822017-05-12 18:01:32 +00001049 /* install the master thread */
1050 team->t.t_threads[0] = master_th;
1051 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001052
Jonathan Peyton30419822017-05-12 18:01:32 +00001053 /* now, install the worker threads */
1054 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055
Jonathan Peyton30419822017-05-12 18:01:32 +00001056 /* fork or reallocate a new thread and install it in team */
1057 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1058 team->t.t_threads[i] = thr;
1059 KMP_DEBUG_ASSERT(thr);
1060 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1061 /* align team and thread arrived states */
1062 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1063 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1064 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1065 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1066 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1067 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1070 thr->th.th_teams_level = master_th->th.th_teams_level;
1071 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001072#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001073 { // Initialize threads' barrier data.
1074 int b;
1075 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1076 for (b = 0; b < bs_last_barrier; ++b) {
1077 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1078 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001079#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001080 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001081#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001082 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001083 }
1084 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001085
Alp Toker98758b02014-03-02 04:12:06 +00001086#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001087 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001089 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090
Jonathan Peyton30419822017-05-12 18:01:32 +00001091 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092}
1093
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001094#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001095// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001096// We try to avoid unnecessary writes to the relevant cache line in the team
1097// structure, so we don't make changes unless they are needed.
1098inline static void propagateFPControl(kmp_team_t *team) {
1099 if (__kmp_inherit_fp_control) {
1100 kmp_int16 x87_fpu_control_word;
1101 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001102
Jonathan Peyton30419822017-05-12 18:01:32 +00001103 // Get master values of FPU control flags (both X87 and vector)
1104 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1105 __kmp_store_mxcsr(&mxcsr);
1106 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001107
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001108 // There is no point looking at t_fp_control_saved here.
1109 // If it is TRUE, we still have to update the values if they are different
1110 // from those we now have. If it is FALSE we didn't save anything yet, but
1111 // our objective is the same. We have to ensure that the values in the team
1112 // are the same as those we have.
1113 // So, this code achieves what we need whether or not t_fp_control_saved is
1114 // true. By checking whether the value needs updating we avoid unnecessary
1115 // writes that would put the cache-line into a written state, causing all
1116 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001117 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1118 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1119 // Although we don't use this value, other code in the runtime wants to know
1120 // whether it should restore them. So we must ensure it is correct.
1121 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1122 } else {
1123 // Similarly here. Don't write to this cache-line in the team structure
1124 // unless we have to.
1125 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1126 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001127}
1128
Jonathan Peyton30419822017-05-12 18:01:32 +00001129// Do the opposite, setting the hardware registers to the updated values from
1130// the team.
1131inline static void updateHWFPControl(kmp_team_t *team) {
1132 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1133 // Only reset the fp control regs if they have been changed in the team.
1134 // the parallel region that we are exiting.
1135 kmp_int16 x87_fpu_control_word;
1136 kmp_uint32 mxcsr;
1137 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1138 __kmp_store_mxcsr(&mxcsr);
1139 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140
Jonathan Peyton30419822017-05-12 18:01:32 +00001141 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1142 __kmp_clear_x87_fpu_status_word();
1143 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001145
1146 if (team->t.t_mxcsr != mxcsr) {
1147 __kmp_load_mxcsr(&team->t.t_mxcsr);
1148 }
1149 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001150}
1151#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001152#define propagateFPControl(x) ((void)0)
1153#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001154#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1155
Jonathan Peyton30419822017-05-12 18:01:32 +00001156static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1157 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001158
Jonathan Peyton30419822017-05-12 18:01:32 +00001159/* Run a parallel region that has been serialized, so runs only in a team of the
1160 single master thread. */
1161void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1162 kmp_info_t *this_thr;
1163 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001164
Jonathan Peyton30419822017-05-12 18:01:32 +00001165 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001166
Jonathan Peyton30419822017-05-12 18:01:32 +00001167 /* Skip all this code for autopar serialized loops since it results in
1168 unacceptable overhead */
1169 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1170 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001171
Jonathan Peyton30419822017-05-12 18:01:32 +00001172 if (!TCR_4(__kmp_init_parallel))
1173 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001174
Jonathan Peyton30419822017-05-12 18:01:32 +00001175 this_thr = __kmp_threads[global_tid];
1176 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001177
Jonathan Peyton30419822017-05-12 18:01:32 +00001178 /* utilize the serialized team held by this thread */
1179 KMP_DEBUG_ASSERT(serial_team);
1180 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001181
Jonathan Peyton30419822017-05-12 18:01:32 +00001182 if (__kmp_tasking_mode != tskm_immediate_exec) {
1183 KMP_DEBUG_ASSERT(
1184 this_thr->th.th_task_team ==
1185 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1186 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1187 NULL);
1188 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1189 "team %p, new task_team = NULL\n",
1190 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1191 this_thr->th.th_task_team = NULL;
1192 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001193
1194#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1196 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1197 proc_bind = proc_bind_false;
1198 } else if (proc_bind == proc_bind_default) {
1199 // No proc_bind clause was specified, so use the current value
1200 // of proc-bind-var for this parallel region.
1201 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1202 }
1203 // Reset for next parallel region
1204 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001205#endif /* OMP_40_ENABLED */
1206
Jonathan Peyton30419822017-05-12 18:01:32 +00001207 if (this_thr->th.th_team != serial_team) {
1208 // Nested level will be an index in the nested nthreads array
1209 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001210
Jonathan Peyton30419822017-05-12 18:01:32 +00001211 if (serial_team->t.t_serialized) {
1212 /* this serial team was already used
1213 TODO increase performance by making this locks more specific */
1214 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001215
Jonathan Peyton30419822017-05-12 18:01:32 +00001216 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001218#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001220#endif
1221
Jonathan Peyton30419822017-05-12 18:01:32 +00001222 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001223#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001224 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001225#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001226#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001227 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001228#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001229 &this_thr->th.th_current_task->td_icvs,
1230 0 USE_NESTED_HOT_ARG(NULL));
1231 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1232 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001233
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 /* setup new serialized team and install it */
1235 new_team->t.t_threads[0] = this_thr;
1236 new_team->t.t_parent = this_thr->th.th_team;
1237 serial_team = new_team;
1238 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239
Jonathan Peyton30419822017-05-12 18:01:32 +00001240 KF_TRACE(
1241 10,
1242 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1243 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001244
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 /* TODO the above breaks the requirement that if we run out of resources,
1246 then we can still guarantee that serialized teams are ok, since we may
1247 need to allocate a new one */
1248 } else {
1249 KF_TRACE(
1250 10,
1251 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1252 global_tid, serial_team));
1253 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001254
Jonathan Peyton30419822017-05-12 18:01:32 +00001255 /* we have to initialize this serial team */
1256 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1257 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1258 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1259 serial_team->t.t_ident = loc;
1260 serial_team->t.t_serialized = 1;
1261 serial_team->t.t_nproc = 1;
1262 serial_team->t.t_parent = this_thr->th.th_team;
1263 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1264 this_thr->th.th_team = serial_team;
1265 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001266
Jonathan Peyton30419822017-05-12 18:01:32 +00001267 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1268 this_thr->th.th_current_task));
1269 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1270 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001271
Jonathan Peyton30419822017-05-12 18:01:32 +00001272 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001273
Jonathan Peyton30419822017-05-12 18:01:32 +00001274 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1275 implicit task for each serialized task represented by
1276 team->t.t_serialized? */
1277 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1278 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001279
Jonathan Peyton30419822017-05-12 18:01:32 +00001280 // Thread value exists in the nested nthreads array for the next nested
1281 // level
1282 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1283 this_thr->th.th_current_task->td_icvs.nproc =
1284 __kmp_nested_nth.nth[level + 1];
1285 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001286
1287#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001288 if (__kmp_nested_proc_bind.used &&
1289 (level + 1 < __kmp_nested_proc_bind.used)) {
1290 this_thr->th.th_current_task->td_icvs.proc_bind =
1291 __kmp_nested_proc_bind.bind_types[level + 1];
1292 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001293#endif /* OMP_40_ENABLED */
1294
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001295#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001296 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001297#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001298 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001299
Jonathan Peyton30419822017-05-12 18:01:32 +00001300 /* set thread cache values */
1301 this_thr->th.th_team_nproc = 1;
1302 this_thr->th.th_team_master = this_thr;
1303 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001304
Jonathan Peyton30419822017-05-12 18:01:32 +00001305 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1306 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001309
Jonathan Peyton30419822017-05-12 18:01:32 +00001310 /* check if we need to allocate dispatch buffers stack */
1311 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1312 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1313 serial_team->t.t_dispatch->th_disp_buffer =
1314 (dispatch_private_info_t *)__kmp_allocate(
1315 sizeof(dispatch_private_info_t));
1316 }
1317 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001318
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001319#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001320 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1321 __ompt_team_assign_id(serial_team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001322#endif
1323
Jonathan Peyton30419822017-05-12 18:01:32 +00001324 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001325
Jonathan Peyton30419822017-05-12 18:01:32 +00001326 } else {
1327 /* this serialized team is already being used,
1328 * that's fine, just add another nested level */
1329 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1330 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1331 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1332 ++serial_team->t.t_serialized;
1333 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334
Jonathan Peyton30419822017-05-12 18:01:32 +00001335 // Nested level will be an index in the nested nthreads array
1336 int level = this_thr->th.th_team->t.t_level;
1337 // Thread value exists in the nested nthreads array for the next nested
1338 // level
1339 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1340 this_thr->th.th_current_task->td_icvs.nproc =
1341 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001343 serial_team->t.t_level++;
1344 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1345 "of serial team %p to %d\n",
1346 global_tid, serial_team, serial_team->t.t_level));
1347
1348 /* allocate/push dispatch buffers stack */
1349 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1350 {
1351 dispatch_private_info_t *disp_buffer =
1352 (dispatch_private_info_t *)__kmp_allocate(
1353 sizeof(dispatch_private_info_t));
1354 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1355 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1356 }
1357 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1358
1359 KMP_MB();
1360 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001361#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001362 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001363#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001364
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 if (__kmp_env_consistency_check)
1366 __kmp_push_parallel(global_tid, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001367}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001368
Jim Cownie5e8470a2013-09-27 10:38:44 +00001369/* most of the work for a fork */
1370/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001371int __kmp_fork_call(ident_t *loc, int gtid,
1372 enum fork_context_e call_context, // Intel, GNU, ...
1373 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001374#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 void *unwrapped_task,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001376#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001377 microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001379#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001380 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001382 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001383#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001384 ) {
1385 void **argv;
1386 int i;
1387 int master_tid;
1388 int master_this_cons;
1389 kmp_team_t *team;
1390 kmp_team_t *parent_team;
1391 kmp_info_t *master_th;
1392 kmp_root_t *root;
1393 int nthreads;
1394 int master_active;
1395 int master_set_numthreads;
1396 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001397#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001398 int active_level;
1399 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001401#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001403#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001404 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001405 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001406 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001407
Jonathan Peyton30419822017-05-12 18:01:32 +00001408 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1409 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1410 /* Some systems prefer the stack for the root thread(s) to start with */
1411 /* some gap from the parent stack to prevent false sharing. */
1412 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1413 /* These 2 lines below are so this does not get optimized out */
1414 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1415 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001416 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001417
1418 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001419 KMP_DEBUG_ASSERT(
1420 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1421 if (!TCR_4(__kmp_init_parallel))
1422 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423
1424 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001425 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1426 // shutdown
1427 parent_team = master_th->th.th_team;
1428 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001429 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001430 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001431 master_active = root->r.r_active;
1432 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001433
1434#if OMPT_SUPPORT
1435 ompt_parallel_id_t ompt_parallel_id;
1436 ompt_task_id_t ompt_task_id;
1437 ompt_frame_t *ompt_frame;
1438 ompt_task_id_t my_task_id;
1439 ompt_parallel_id_t my_parallel_id;
1440
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001441 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001442 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1443 ompt_task_id = __ompt_get_task_id_internal(0);
1444 ompt_frame = __ompt_get_task_frame_internal(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001445 }
1446#endif
1447
Jim Cownie5e8470a2013-09-27 10:38:44 +00001448 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001449 level = parent_team->t.t_level;
1450 // used to launch non-serial teams even if nested is not allowed
1451 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001452#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001453 // needed to check nesting inside the teams
1454 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001456#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001457 p_hot_teams = &master_th->th.th_hot_teams;
1458 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1459 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1460 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1461 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001462 // it is either actual or not needed (when active_level > 0)
1463 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464 }
1465#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001467#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001468 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001469 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001470 int team_size = master_set_numthreads;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001471
Jonathan Peyton30419822017-05-12 18:01:32 +00001472 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1473 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
1474 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001475 }
1476#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001477
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478 master_th->th.th_ident = loc;
1479
1480#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001481 if (master_th->th.th_teams_microtask && ap &&
1482 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1483 // AC: This is start of parallel that is nested inside teams construct.
1484 // The team is actual (hot), all workers are ready at the fork barrier.
1485 // No lock needed to initialize the team a bit, then free workers.
1486 parent_team->t.t_ident = loc;
1487 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1488 parent_team->t.t_argc = argc;
1489 argv = (void **)parent_team->t.t_argv;
1490 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001491/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001492#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001493 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001495 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001496#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001497 // Increment our nested depth levels, but not increase the serialization
1498 if (parent_team == master_th->th.th_serial_team) {
1499 // AC: we are in serialized parallel
1500 __kmpc_serialized_parallel(loc, gtid);
1501 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1502 // AC: need this in order enquiry functions work
1503 // correctly, will restore at join time
1504 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001505#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001506 void *dummy;
1507 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508
Jonathan Peyton30419822017-05-12 18:01:32 +00001509 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001510
Jonathan Peyton30419822017-05-12 18:01:32 +00001511 if (ompt_enabled) {
1512 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
1513 ompt_parallel_id);
1514 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1515 exit_runtime_p =
1516 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001517
Jonathan Peyton30419822017-05-12 18:01:32 +00001518 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001519
1520#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001521 /* OMPT implicit task begin */
1522 my_task_id = lw_taskteam.ompt_task_info.task_id;
1523 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1524 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1525 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1526 my_parallel_id, my_task_id);
1527 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001528#endif
1529
Jonathan Peyton30419822017-05-12 18:01:32 +00001530 /* OMPT state */
1531 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1532 } else {
1533 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001534 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001535#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001536
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001537 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001538 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1539 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1540 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1541#if OMPT_SUPPORT
1542 ,
1543 exit_runtime_p
1544#endif
1545 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001546 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Jonathan Peyton30419822017-05-12 18:01:32 +00001548#if OMPT_SUPPORT
1549 *exit_runtime_p = NULL;
1550 if (ompt_enabled) {
1551#if OMPT_TRACE
1552 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553
Jonathan Peyton30419822017-05-12 18:01:32 +00001554 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1555 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1556 ompt_parallel_id, ompt_task_id);
1557 }
1558
1559 __ompt_lw_taskteam_unlink(master_th);
1560 // reset clear the task id only after unlinking the task
1561 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1562#endif
1563
1564 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1565 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1566 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1567 }
1568 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1569 }
1570#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001572 }
1573
1574 parent_team->t.t_pkfn = microtask;
1575#if OMPT_SUPPORT
1576 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1577#endif
1578 parent_team->t.t_invoke = invoker;
1579 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1580 parent_team->t.t_active_level++;
1581 parent_team->t.t_level++;
1582
1583 /* Change number of threads in the team if requested */
1584 if (master_set_numthreads) { // The parallel has num_threads clause
1585 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1586 // AC: only can reduce number of threads dynamically, can't increase
1587 kmp_info_t **other_threads = parent_team->t.t_threads;
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1591 }
1592 // Keep extra threads hot in the team for possible next parallels
1593 }
1594 master_th->th.th_set_nproc = 0;
1595 }
1596
1597#if USE_DEBUGGER
1598 if (__kmp_debugging) { // Let debugger override number of threads.
1599 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001600 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001601 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001602 }
1603 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001604#endif
1605
1606 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1607 "master_th=%p, gtid=%d\n",
1608 root, parent_team, master_th, gtid));
1609 __kmp_internal_fork(loc, gtid, parent_team);
1610 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1611 "master_th=%p, gtid=%d\n",
1612 root, parent_team, master_th, gtid));
1613
1614 /* Invoke microtask for MASTER thread */
1615 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1616 parent_team->t.t_id, parent_team->t.t_pkfn));
1617
1618 {
1619 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1620 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1621 if (!parent_team->t.t_invoke(gtid)) {
1622 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
1623 }
1624 }
1625 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1626 parent_team->t.t_id, parent_team->t.t_pkfn));
1627 KMP_MB(); /* Flush all pending memory write invalidates. */
1628
1629 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1630
1631 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001632 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633#endif /* OMP_40_ENABLED */
1634
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001635#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001636 if (__kmp_tasking_mode != tskm_immediate_exec) {
1637 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1638 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001640#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641
Jonathan Peyton30419822017-05-12 18:01:32 +00001642 if (parent_team->t.t_active_level >=
1643 master_th->th.th_current_task->td_icvs.max_active_levels) {
1644 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001645 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001646#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001647 int enter_teams = ((ap == NULL && active_level == 0) ||
1648 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001649#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001650 nthreads =
1651 master_set_numthreads
1652 ? master_set_numthreads
1653 : get__nproc_2(
1654 parent_team,
1655 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001656
Jonathan Peyton30419822017-05-12 18:01:32 +00001657 // Check if we need to take forkjoin lock? (no need for serialized
1658 // parallel out of teams construct). This code moved here from
1659 // __kmp_reserve_threads() to speedup nested serialized parallels.
1660 if (nthreads > 1) {
1661 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001662#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001663 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001664#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 )) ||
1666 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001667 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1668 " threads\n",
1669 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001670 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001671 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001672 }
1673 if (nthreads > 1) {
1674 /* determine how many new threads we can use */
1675 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001676 nthreads = __kmp_reserve_threads(
1677 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001679 /* AC: If we execute teams from parallel region (on host), then
1680 teams should be created but each can only have 1 thread if
1681 nesting is disabled. If teams called from serial region, then
1682 teams and their threads should be created regardless of the
1683 nesting setting. */
1684 ,
1685 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001686#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001687 );
1688 if (nthreads == 1) {
1689 // Free lock for single thread execution here; for multi-thread
1690 // execution it will be freed later after team of threads created
1691 // and initialized
1692 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001693 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001696 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697
Jonathan Peyton30419822017-05-12 18:01:32 +00001698 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001699 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001702 if (nthreads == 1) {
1703/* josh todo: hypothetical question: what do we do for OS X*? */
1704#if KMP_OS_LINUX && \
1705 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1706 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001708 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1709#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1710 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 KA_TRACE(20,
1713 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001714
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001716
Jonathan Peyton30419822017-05-12 18:01:32 +00001717 if (call_context == fork_context_intel) {
1718 /* TODO this sucks, use the compiler itself to pass args! :) */
1719 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001721 if (!ap) {
1722 // revert change made in __kmpc_serialized_parallel()
1723 master_th->th.th_serial_team->t.t_level--;
1724// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001725
1726#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 void *dummy;
1728 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001729
Jonathan Peyton30419822017-05-12 18:01:32 +00001730 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001731
Jonathan Peyton30419822017-05-12 18:01:32 +00001732 if (ompt_enabled) {
1733 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1734 unwrapped_task, ompt_parallel_id);
1735 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1736 exit_runtime_p =
1737 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001738
Jonathan Peyton30419822017-05-12 18:01:32 +00001739 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740
1741#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 my_task_id = lw_taskteam.ompt_task_info.task_id;
1743 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1744 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1745 ompt_parallel_id, my_task_id);
1746 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001747#endif
1748
Jonathan Peyton30419822017-05-12 18:01:32 +00001749 /* OMPT state */
1750 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1751 } else {
1752 exit_runtime_p = &dummy;
1753 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001754#endif
1755
Jonathan Peyton30419822017-05-12 18:01:32 +00001756 {
1757 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1758 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1759 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1760 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001762 ,
1763 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001764#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001765 );
1766 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001767
1768#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001769 *exit_runtime_p = NULL;
1770 if (ompt_enabled) {
1771 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001772
1773#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001774 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1775 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1776 ompt_parallel_id, ompt_task_id);
1777 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001778#endif
1779
Jonathan Peyton30419822017-05-12 18:01:32 +00001780 __ompt_lw_taskteam_unlink(master_th);
1781 // reset clear the task id only after unlinking the task
1782 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001783
Jonathan Peyton30419822017-05-12 18:01:32 +00001784 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1785 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1786 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1787 }
1788 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1789 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001791 } else if (microtask == (microtask_t)__kmp_teams_master) {
1792 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1793 master_th->th.th_serial_team);
1794 team = master_th->th.th_team;
1795 // team->t.t_pkfn = microtask;
1796 team->t.t_invoke = invoker;
1797 __kmp_alloc_argv_entries(argc, team, TRUE);
1798 team->t.t_argc = argc;
1799 argv = (void **)team->t.t_argv;
1800 if (ap) {
1801 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001802// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001803#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001804 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001805#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001806 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001808 } else {
1809 for (i = 0; i < argc; ++i)
1810 // Get args from parent team for teams construct
1811 argv[i] = parent_team->t.t_argv[i];
1812 }
1813 // AC: revert change made in __kmpc_serialized_parallel()
1814 // because initial code in teams should have level=0
1815 team->t.t_level--;
1816 // AC: call special invoker for outer "parallel" of teams construct
1817 {
1818 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1819 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1820 invoker(gtid);
1821 }
1822 } else {
1823#endif /* OMP_40_ENABLED */
1824 argv = args;
1825 for (i = argc - 1; i >= 0; --i)
1826// TODO: revert workaround for Intel(R) 64 tracker #96
1827#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1828 *argv++ = va_arg(*ap, void *);
1829#else
1830 *argv++ = va_arg(ap, void *);
1831#endif
1832 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001833
1834#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001835 void *dummy;
1836 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001837
Jonathan Peyton30419822017-05-12 18:01:32 +00001838 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001839
Jonathan Peyton30419822017-05-12 18:01:32 +00001840 if (ompt_enabled) {
1841 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1842 unwrapped_task, ompt_parallel_id);
1843 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1844 exit_runtime_p =
1845 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001846
Jonathan Peyton30419822017-05-12 18:01:32 +00001847 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001848
1849#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001850 /* OMPT implicit task begin */
1851 my_task_id = lw_taskteam.ompt_task_info.task_id;
1852 my_parallel_id = ompt_parallel_id;
1853 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1854 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1855 my_parallel_id, my_task_id);
1856 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001857#endif
1858
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 /* OMPT state */
1860 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1861 } else {
1862 exit_runtime_p = &dummy;
1863 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864#endif
1865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 {
1867 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1868 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1869 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001870#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001871 ,
1872 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001873#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001874 );
1875 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001876
1877#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 *exit_runtime_p = NULL;
1879 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001880#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1884 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1885 my_parallel_id, my_task_id);
1886 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001887#endif
1888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889 __ompt_lw_taskteam_unlink(master_th);
1890 // reset clear the task id only after unlinking the task
1891 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001892
Jonathan Peyton30419822017-05-12 18:01:32 +00001893 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1894 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1895 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1896 }
1897 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1898 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001899#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001902#endif /* OMP_40_ENABLED */
1903 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001904#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001905 ompt_lw_taskteam_t *lwt =
1906 (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
1907 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
1908 ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001909
Jonathan Peyton30419822017-05-12 18:01:32 +00001910 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1911 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
1912 __ompt_lw_taskteam_link(lwt, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001913#endif
1914
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 // we were called from GNU native code
1916 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001918 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001919 KMP_ASSERT2(call_context < fork_context_last,
1920 "__kmp_fork_call: unknown fork_context parameter");
1921 }
1922
1923 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1924 KMP_MB();
1925 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926 }
1927
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928 // GEH: only modify the executing flag in the case when not serialized
1929 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001930 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1931 "curtask=%p, curtask_max_aclevel=%d\n",
1932 parent_team->t.t_active_level, master_th,
1933 master_th->th.th_current_task,
1934 master_th->th.th_current_task->td_icvs.max_active_levels));
1935 // TODO: GEH - cannot do this assertion because root thread not set up as
1936 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001937 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1938 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939
1940#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001941 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942#endif /* OMP_40_ENABLED */
1943 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001944 /* Increment our nested depth level */
1945 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946 }
1947
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001950 if ((level + 1 < __kmp_nested_nth.used) &&
1951 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1952 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1953 } else {
1954 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
1956
1957#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 kmp_proc_bind_t proc_bind_icv =
1961 proc_bind_default; // proc_bind_default means don't update
1962 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1963 proc_bind = proc_bind_false;
1964 } else {
1965 if (proc_bind == proc_bind_default) {
1966 // No proc_bind clause specified; use current proc-bind-var for this
1967 // parallel region
1968 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1969 }
1970 /* else: The proc_bind policy was specified explicitly on parallel clause.
1971 This overrides proc-bind-var for this parallel region, but does not
1972 change proc-bind-var. */
1973 // Figure the value of proc-bind-var for the child threads.
1974 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1975 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1976 master_th->th.th_current_task->td_icvs.proc_bind)) {
1977 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1978 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001979 }
1980
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982 master_th->th.th_set_proc_bind = proc_bind_default;
1983#endif /* OMP_40_ENABLED */
1984
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001985 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 ) {
1990 kmp_internal_control_t new_icvs;
1991 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1992 new_icvs.next = NULL;
1993 if (nthreads_icv > 0) {
1994 new_icvs.nproc = nthreads_icv;
1995 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996
1997#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001998 if (proc_bind_icv != proc_bind_default) {
1999 new_icvs.proc_bind = proc_bind_icv;
2000 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001#endif /* OMP_40_ENABLED */
2002
Jonathan Peyton30419822017-05-12 18:01:32 +00002003 /* allocate a new parallel team */
2004 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2005 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002006#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002008#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002010 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002013 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002014 /* allocate a new parallel team */
2015 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2016 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002018 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002019#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002021 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 &master_th->th.th_current_task->td_icvs,
2024 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 KF_TRACE(
2027 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002028
2029 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002030 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2031 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2032 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2033 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2034 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002035#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002036 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002037#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002038 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2039// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002041 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002043 int new_level = parent_team->t.t_level + 1;
2044 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2045 new_level = parent_team->t.t_active_level + 1;
2046 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047#if OMP_40_ENABLED
2048 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002049 // AC: Do not increase parallel level at start of the teams construct
2050 int new_level = parent_team->t.t_level;
2051 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2052 new_level = parent_team->t.t_active_level;
2053 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 }
2055#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002056 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002057 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
2058 team->t.t_sched.chunk != new_sched.chunk)
2059 team->t.t_sched =
2060 new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002061
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002062#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002063 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002064#endif
2065
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002066 // Update the floating point rounding in the team if required.
2067 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 if (__kmp_tasking_mode != tskm_immediate_exec) {
2070 // Set master's task team to team's task team. Unless this is hot team, it
2071 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002072 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2073 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002074 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2075 "%p, new task_team %p / team %p\n",
2076 __kmp_gtid_from_thread(master_th),
2077 master_th->th.th_task_team, parent_team,
2078 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002079
Jonathan Peyton30419822017-05-12 18:01:32 +00002080 if (active_level || master_th->th.th_task_team) {
2081 // Take a memo of master's task_state
2082 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2083 if (master_th->th.th_task_state_top >=
2084 master_th->th.th_task_state_stack_sz) { // increase size
2085 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2086 kmp_uint8 *old_stack, *new_stack;
2087 kmp_uint32 i;
2088 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2089 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2090 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2091 }
2092 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2093 ++i) { // zero-init rest of stack
2094 new_stack[i] = 0;
2095 }
2096 old_stack = master_th->th.th_task_state_memo_stack;
2097 master_th->th.th_task_state_memo_stack = new_stack;
2098 master_th->th.th_task_state_stack_sz = new_size;
2099 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002100 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002101 // Store master's task_state on stack
2102 master_th->th
2103 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2104 master_th->th.th_task_state;
2105 master_th->th.th_task_state_top++;
2106#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002107 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2108 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002109 master_th->th.th_task_state =
2110 master_th->th
2111 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2112 } else {
2113#endif
2114 master_th->th.th_task_state = 0;
2115#if KMP_NESTED_HOT_TEAMS
2116 }
2117#endif
2118 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2121 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002122#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124
Jonathan Peyton30419822017-05-12 18:01:32 +00002125 KA_TRACE(
2126 20,
2127 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2128 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2129 team->t.t_nproc));
2130 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2131 (team->t.t_master_tid == 0 &&
2132 (team->t.t_parent == root->r.r_root_team ||
2133 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134 KMP_MB();
2135
2136 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002137 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002140#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002141 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002142// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002143#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002144 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002146 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002147#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002148 KMP_CHECK_UPDATE(*argv, new_argv);
2149 argv++;
2150 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002151#if OMP_40_ENABLED
2152 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002153 for (i = 0; i < argc; ++i) {
2154 // Get args from parent team for teams construct
2155 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157 }
2158#endif /* OMP_40_ENABLED */
2159
2160 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002161 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002162 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002163 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002164
Jonathan Peyton30419822017-05-12 18:01:32 +00002165 __kmp_fork_team_threads(root, team, master_th, gtid);
2166 __kmp_setup_icv_copy(team, nthreads,
2167 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002169#if OMPT_SUPPORT
2170 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2171#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002172
Jonathan Peyton30419822017-05-12 18:01:32 +00002173 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002176 if (team->t.t_active_level == 1 // only report frames at level 1
2177#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002178 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002179#endif /* OMP_40_ENABLED */
2180 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002181#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002182 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2183 (__kmp_forkjoin_frames_mode == 3 ||
2184 __kmp_forkjoin_frames_mode == 1)) {
2185 kmp_uint64 tmp_time = 0;
2186 if (__itt_get_timestamp_ptr)
2187 tmp_time = __itt_get_timestamp();
2188 // Internal fork - report frame begin
2189 master_th->th.th_frame_time = tmp_time;
2190 if (__kmp_forkjoin_frames_mode == 3)
2191 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002192 } else
2193// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002194#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002195 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2196 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2197 // Mark start of "parallel" region for VTune.
2198 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2199 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002200 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002201#endif /* USE_ITT_BUILD */
2202
2203 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002204 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 KF_TRACE(10,
2207 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2208 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209
2210#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002211 if (__itt_stack_caller_create_ptr) {
2212 team->t.t_stack_id =
2213 __kmp_itt_stack_caller_create(); // create new stack stitching id
2214 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 }
2216#endif /* USE_ITT_BUILD */
2217
2218#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002219 // AC: skip __kmp_internal_fork at teams construct, let only master
2220 // threads execute
2221 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002222#endif /* OMP_40_ENABLED */
2223 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002224 __kmp_internal_fork(loc, gtid, team);
2225 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2226 "master_th=%p, gtid=%d\n",
2227 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002228 }
2229
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002230 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002231 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2232 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233 }
2234
2235 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002236 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2237 team->t.t_id, team->t.t_pkfn));
2238 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002239
Jonathan Peyton30419822017-05-12 18:01:32 +00002240 {
2241 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2242 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2243 if (!team->t.t_invoke(gtid)) {
2244 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jim Cownie5e8470a2013-09-27 10:38:44 +00002245 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002246 }
2247 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2248 team->t.t_id, team->t.t_pkfn));
2249 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250
Jonathan Peyton30419822017-05-12 18:01:32 +00002251 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002253#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002254 if (ompt_enabled) {
2255 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2256 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002257#endif
2258
Jonathan Peyton30419822017-05-12 18:01:32 +00002259 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260}
2261
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002262#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002263static inline void __kmp_join_restore_state(kmp_info_t *thread,
2264 kmp_team_t *team) {
2265 // restore state outside the region
2266 thread->th.ompt_thread_info.state =
2267 ((team->t.t_serialized) ? ompt_state_work_serial
2268 : ompt_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002269}
2270
Jonathan Peyton30419822017-05-12 18:01:32 +00002271static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
2272 ompt_parallel_id_t parallel_id,
2273 fork_context_e fork_context) {
2274 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2275 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2276 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2277 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2278 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002279
Jonathan Peyton30419822017-05-12 18:01:32 +00002280 task_info->frame.reenter_runtime_frame = NULL;
2281 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002282}
2283#endif
2284
Jonathan Peyton30419822017-05-12 18:01:32 +00002285void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002286#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002287 ,
2288 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002289#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002290#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002291 ,
2292 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002293#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002294 ) {
2295 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2296 kmp_team_t *team;
2297 kmp_team_t *parent_team;
2298 kmp_info_t *master_th;
2299 kmp_root_t *root;
2300 int master_active;
2301 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002302
Jonathan Peyton30419822017-05-12 18:01:32 +00002303 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 /* setup current data */
2306 master_th = __kmp_threads[gtid];
2307 root = master_th->th.th_root;
2308 team = master_th->th.th_team;
2309 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002310
Jonathan Peyton30419822017-05-12 18:01:32 +00002311 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002313#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002314 if (ompt_enabled) {
2315 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2316 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002317#endif
2318
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002319#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002320 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2321 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2322 "th_task_team = %p\n",
2323 __kmp_gtid_from_thread(master_th), team,
2324 team->t.t_task_team[master_th->th.th_task_state],
2325 master_th->th.th_task_team));
2326 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2327 team->t.t_task_team[master_th->th.th_task_state]);
2328 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002329#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002330
Jonathan Peyton30419822017-05-12 18:01:32 +00002331 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002333 if (master_th->th.th_teams_microtask) {
2334 // We are in teams construct
2335 int level = team->t.t_level;
2336 int tlevel = master_th->th.th_teams_level;
2337 if (level == tlevel) {
2338 // AC: we haven't incremented it earlier at start of teams construct,
2339 // so do it here - at the end of teams construct
2340 team->t.t_level++;
2341 } else if (level == tlevel + 1) {
2342 // AC: we are exiting parallel inside teams, need to increment
2343 // serialization in order to restore it in the next call to
2344 // __kmpc_end_serialized_parallel
2345 team->t.t_serialized++;
2346 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002347 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002348#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002349 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002350
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002351#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002352 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002353 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002354 }
2355#endif
2356
Jonathan Peyton30419822017-05-12 18:01:32 +00002357 return;
2358 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 master_active = team->t.t_master_active;
2361
2362#if OMP_40_ENABLED
2363 if (!exit_teams)
2364#endif /* OMP_40_ENABLED */
2365 {
2366 // AC: No barrier for internal teams at exit from teams construct.
2367 // But there is barrier for external team (league).
2368 __kmp_internal_join(loc, gtid, team);
2369 }
2370#if OMP_40_ENABLED
2371 else {
2372 master_th->th.th_task_state =
2373 0; // AC: no tasking in teams (out of any parallel)
2374 }
2375#endif /* OMP_40_ENABLED */
2376
2377 KMP_MB();
2378
2379#if OMPT_SUPPORT
2380 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2381#endif
2382
2383#if USE_ITT_BUILD
2384 if (__itt_stack_caller_create_ptr) {
2385 __kmp_itt_stack_caller_destroy(
2386 (__itt_caller)team->t
2387 .t_stack_id); // destroy the stack stitching id after join barrier
2388 }
2389
2390 // Mark end of "parallel" region for VTune.
2391 if (team->t.t_active_level == 1
2392#if OMP_40_ENABLED
2393 && !master_th->th.th_teams_microtask /* not in teams construct */
2394#endif /* OMP_40_ENABLED */
2395 ) {
2396 master_th->th.th_ident = loc;
2397 // only one notification scheme (either "submit" or "forking/joined", not
2398 // both)
2399 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2400 __kmp_forkjoin_frames_mode == 3)
2401 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2402 master_th->th.th_frame_time, 0, loc,
2403 master_th->th.th_team_nproc, 1);
2404 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2405 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2406 __kmp_itt_region_joined(gtid);
2407 } // active_level == 1
2408#endif /* USE_ITT_BUILD */
2409
2410#if OMP_40_ENABLED
2411 if (master_th->th.th_teams_microtask && !exit_teams &&
2412 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2413 team->t.t_level == master_th->th.th_teams_level + 1) {
2414 // AC: We need to leave the team structure intact at the end of parallel
2415 // inside the teams construct, so that at the next parallel same (hot) team
2416 // works, only adjust nesting levels
2417
2418 /* Decrement our nested depth level */
2419 team->t.t_level--;
2420 team->t.t_active_level--;
2421 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2422
2423 /* Restore number of threads in the team if needed */
2424 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2425 int old_num = master_th->th.th_team_nproc;
2426 int new_num = master_th->th.th_teams_size.nth;
2427 kmp_info_t **other_threads = team->t.t_threads;
2428 team->t.t_nproc = new_num;
2429 for (i = 0; i < old_num; ++i) {
2430 other_threads[i]->th.th_team_nproc = new_num;
2431 }
2432 // Adjust states of non-used threads of the team
2433 for (i = old_num; i < new_num; ++i) {
2434 // Re-initialize thread's barrier data.
2435 int b;
2436 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2437 for (b = 0; b < bs_last_barrier; ++b) {
2438 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2439 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2440#if USE_DEBUGGER
2441 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2442#endif
2443 }
2444 if (__kmp_tasking_mode != tskm_immediate_exec) {
2445 // Synchronize thread's task state
2446 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2447 }
2448 }
2449 }
2450
2451#if OMPT_SUPPORT
2452 if (ompt_enabled) {
2453 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2454 }
2455#endif
2456
2457 return;
2458 }
2459#endif /* OMP_40_ENABLED */
2460
2461 /* do cleanup and restore the parent team */
2462 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2463 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2464
2465 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2466
2467 /* jc: The following lock has instructions with REL and ACQ semantics,
2468 separating the parallel user code called in this parallel region
2469 from the serial user code called after this function returns. */
2470 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2471
2472#if OMP_40_ENABLED
2473 if (!master_th->th.th_teams_microtask ||
2474 team->t.t_level > master_th->th.th_teams_level)
2475#endif /* OMP_40_ENABLED */
2476 {
2477 /* Decrement our nested depth level */
2478 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2479 }
2480 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2481
2482#if OMPT_SUPPORT && OMPT_TRACE
2483 if (ompt_enabled) {
2484 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2485 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2486 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2487 parallel_id, task_info->task_id);
2488 }
2489 task_info->frame.exit_runtime_frame = NULL;
2490 task_info->task_id = 0;
2491 }
2492#endif
2493
2494 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2495 master_th, team));
2496 __kmp_pop_current_task_from_thread(master_th);
2497
2498#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2499 // Restore master thread's partition.
2500 master_th->th.th_first_place = team->t.t_first_place;
2501 master_th->th.th_last_place = team->t.t_last_place;
2502#endif /* OMP_40_ENABLED */
2503
2504 updateHWFPControl(team);
2505
2506 if (root->r.r_active != master_active)
2507 root->r.r_active = master_active;
2508
2509 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2510 master_th)); // this will free worker threads
2511
2512 /* this race was fun to find. make sure the following is in the critical
2513 region otherwise assertions may fail occasionally since the old team may be
2514 reallocated and the hierarchy appears inconsistent. it is actually safe to
2515 run and won't cause any bugs, but will cause those assertion failures. it's
2516 only one deref&assign so might as well put this in the critical region */
2517 master_th->th.th_team = parent_team;
2518 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2519 master_th->th.th_team_master = parent_team->t.t_threads[0];
2520 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2521
2522 /* restore serialized team, if need be */
2523 if (parent_team->t.t_serialized &&
2524 parent_team != master_th->th.th_serial_team &&
2525 parent_team != root->r.r_root_team) {
2526 __kmp_free_team(root,
2527 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2528 master_th->th.th_serial_team = parent_team;
2529 }
2530
2531 if (__kmp_tasking_mode != tskm_immediate_exec) {
2532 if (master_th->th.th_task_state_top >
2533 0) { // Restore task state from memo stack
2534 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2535 // Remember master's state if we re-use this nested hot team
2536 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2537 master_th->th.th_task_state;
2538 --master_th->th.th_task_state_top; // pop
2539 // Now restore state at this level
2540 master_th->th.th_task_state =
2541 master_th->th
2542 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2543 }
2544 // Copy the task team from the parent team to the master thread
2545 master_th->th.th_task_team =
2546 parent_team->t.t_task_team[master_th->th.th_task_state];
2547 KA_TRACE(20,
2548 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2549 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2550 parent_team));
2551 }
2552
2553 // TODO: GEH - cannot do this assertion because root thread not set up as
2554 // executing
2555 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2556 master_th->th.th_current_task->td_flags.executing = 1;
2557
2558 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2559
2560#if OMPT_SUPPORT
2561 if (ompt_enabled) {
2562 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2563 }
2564#endif
2565
2566 KMP_MB();
2567 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2568}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569
2570/* Check whether we should push an internal control record onto the
2571 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002572void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002573
Jonathan Peyton30419822017-05-12 18:01:32 +00002574 if (thread->th.th_team != thread->th.th_serial_team) {
2575 return;
2576 }
2577 if (thread->th.th_team->t.t_serialized > 1) {
2578 int push = 0;
2579
2580 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2581 push = 1;
2582 } else {
2583 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2584 thread->th.th_team->t.t_serialized) {
2585 push = 1;
2586 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002588 if (push) { /* push a record on the serial team's stack */
2589 kmp_internal_control_t *control =
2590 (kmp_internal_control_t *)__kmp_allocate(
2591 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592
Jonathan Peyton30419822017-05-12 18:01:32 +00002593 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002594
Jonathan Peyton30419822017-05-12 18:01:32 +00002595 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596
Jonathan Peyton30419822017-05-12 18:01:32 +00002597 control->next = thread->th.th_team->t.t_control_stack_top;
2598 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601}
2602
2603/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002604void __kmp_set_num_threads(int new_nth, int gtid) {
2605 kmp_info_t *thread;
2606 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607
Jonathan Peyton30419822017-05-12 18:01:32 +00002608 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2609 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610
Jonathan Peyton30419822017-05-12 18:01:32 +00002611 if (new_nth < 1)
2612 new_nth = 1;
2613 else if (new_nth > __kmp_max_nth)
2614 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615
Jonathan Peyton30419822017-05-12 18:01:32 +00002616 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2617 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618
Jonathan Peyton30419822017-05-12 18:01:32 +00002619 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620
Jonathan Peyton30419822017-05-12 18:01:32 +00002621 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622
Jonathan Peyton30419822017-05-12 18:01:32 +00002623 // If this omp_set_num_threads() call will cause the hot team size to be
2624 // reduced (in the absence of a num_threads clause), then reduce it now,
2625 // rather than waiting for the next parallel region.
2626 root = thread->th.th_root;
2627 if (__kmp_init_parallel && (!root->r.r_active) &&
2628 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002629#if KMP_NESTED_HOT_TEAMS
2630 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2631#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002632 ) {
2633 kmp_team_t *hot_team = root->r.r_hot_team;
2634 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002635
Jonathan Peyton30419822017-05-12 18:01:32 +00002636 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637
Jonathan Peyton30419822017-05-12 18:01:32 +00002638 // Release the extra threads we don't need any more.
2639 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2640 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2641 if (__kmp_tasking_mode != tskm_immediate_exec) {
2642 // When decreasing team size, threads no longer in the team should unref
2643 // task team.
2644 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2645 }
2646 __kmp_free_thread(hot_team->t.t_threads[f]);
2647 hot_team->t.t_threads[f] = NULL;
2648 }
2649 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002650#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002651 if (thread->th.th_hot_teams) {
2652 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2653 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2654 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002655#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002656
Jonathan Peyton30419822017-05-12 18:01:32 +00002657 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002658
Jonathan Peyton30419822017-05-12 18:01:32 +00002659 // Update the t_nproc field in the threads that are still active.
2660 for (f = 0; f < new_nth; f++) {
2661 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2662 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002663 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002664 // Special flag in case omp_set_num_threads() call
2665 hot_team->t.t_size_changed = -1;
2666 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667}
2668
Jim Cownie5e8470a2013-09-27 10:38:44 +00002669/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002670void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2671 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2674 "%d = (%d)\n",
2675 gtid, max_active_levels));
2676 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002677
Jonathan Peyton30419822017-05-12 18:01:32 +00002678 // validate max_active_levels
2679 if (max_active_levels < 0) {
2680 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2681 // We ignore this call if the user has specified a negative value.
2682 // The current setting won't be changed. The last valid setting will be
2683 // used. A warning will be issued (if warnings are allowed as controlled by
2684 // the KMP_WARNINGS env var).
2685 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2686 "max_active_levels for thread %d = (%d)\n",
2687 gtid, max_active_levels));
2688 return;
2689 }
2690 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2691 // it's OK, the max_active_levels is within the valid range: [ 0;
2692 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2693 // We allow a zero value. (implementation defined behavior)
2694 } else {
2695 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2696 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2697 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2698 // Current upper limit is MAX_INT. (implementation defined behavior)
2699 // If the input exceeds the upper limit, we correct the input to be the
2700 // upper limit. (implementation defined behavior)
2701 // Actually, the flow should never get here until we use MAX_INT limit.
2702 }
2703 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2704 "max_active_levels for thread %d = (%d)\n",
2705 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002706
Jonathan Peyton30419822017-05-12 18:01:32 +00002707 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002708
Jonathan Peyton30419822017-05-12 18:01:32 +00002709 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712}
2713
2714/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002715int __kmp_get_max_active_levels(int gtid) {
2716 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717
Jonathan Peyton30419822017-05-12 18:01:32 +00002718 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2719 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720
Jonathan Peyton30419822017-05-12 18:01:32 +00002721 thread = __kmp_threads[gtid];
2722 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2723 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2724 "curtask_maxaclevel=%d\n",
2725 gtid, thread->th.th_current_task,
2726 thread->th.th_current_task->td_icvs.max_active_levels));
2727 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728}
2729
2730/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002731void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2732 kmp_info_t *thread;
2733 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2736 gtid, (int)kind, chunk));
2737 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738
Jonathan Peyton30419822017-05-12 18:01:32 +00002739 // Check if the kind parameter is valid, correct if needed.
2740 // Valid parameters should fit in one of two intervals - standard or extended:
2741 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2742 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2743 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2744 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2745 // TODO: Hint needs attention in case we change the default schedule.
2746 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2747 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2748 __kmp_msg_null);
2749 kind = kmp_sched_default;
2750 chunk = 0; // ignore chunk value in case of bad kind
2751 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752
Jonathan Peyton30419822017-05-12 18:01:32 +00002753 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002754
Jonathan Peyton30419822017-05-12 18:01:32 +00002755 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002756
Jonathan Peyton30419822017-05-12 18:01:32 +00002757 if (kind < kmp_sched_upper_std) {
2758 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2759 // differ static chunked vs. unchunked: chunk should be invalid to
2760 // indicate unchunked schedule (which is the default)
2761 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002762 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002763 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2764 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002765 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002766 } else {
2767 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2768 // kmp_sched_lower - 2 ];
2769 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2770 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2771 kmp_sched_lower - 2];
2772 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002773 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002774 // ignore parameter chunk for schedule auto
2775 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2776 } else {
2777 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2778 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779}
2780
2781/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002782void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2783 kmp_info_t *thread;
2784 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002785
Jonathan Peyton30419822017-05-12 18:01:32 +00002786 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2787 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002788
Jonathan Peyton30419822017-05-12 18:01:32 +00002789 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002790
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792
Jonathan Peyton30419822017-05-12 18:01:32 +00002793 switch (th_type) {
2794 case kmp_sch_static:
2795 case kmp_sch_static_greedy:
2796 case kmp_sch_static_balanced:
2797 *kind = kmp_sched_static;
2798 *chunk = 0; // chunk was not set, try to show this fact via zero value
2799 return;
2800 case kmp_sch_static_chunked:
2801 *kind = kmp_sched_static;
2802 break;
2803 case kmp_sch_dynamic_chunked:
2804 *kind = kmp_sched_dynamic;
2805 break;
2806 case kmp_sch_guided_chunked:
2807 case kmp_sch_guided_iterative_chunked:
2808 case kmp_sch_guided_analytical_chunked:
2809 *kind = kmp_sched_guided;
2810 break;
2811 case kmp_sch_auto:
2812 *kind = kmp_sched_auto;
2813 break;
2814 case kmp_sch_trapezoidal:
2815 *kind = kmp_sched_trapezoidal;
2816 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002817#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002818 case kmp_sch_static_steal:
2819 *kind = kmp_sched_static_steal;
2820 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002821#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002822 default:
2823 KMP_FATAL(UnknownSchedulingType, th_type);
2824 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825
Jonathan Peyton30419822017-05-12 18:01:32 +00002826 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827}
2828
Jonathan Peyton30419822017-05-12 18:01:32 +00002829int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830
Jonathan Peyton30419822017-05-12 18:01:32 +00002831 int ii, dd;
2832 kmp_team_t *team;
2833 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002834
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2836 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002837
Jonathan Peyton30419822017-05-12 18:01:32 +00002838 // validate level
2839 if (level == 0)
2840 return 0;
2841 if (level < 0)
2842 return -1;
2843 thr = __kmp_threads[gtid];
2844 team = thr->th.th_team;
2845 ii = team->t.t_level;
2846 if (level > ii)
2847 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848
2849#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002850 if (thr->th.th_teams_microtask) {
2851 // AC: we are in teams region where multiple nested teams have same level
2852 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2853 if (level <=
2854 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2855 KMP_DEBUG_ASSERT(ii >= tlevel);
2856 // AC: As we need to pass by the teams league, we need to artificially
2857 // increase ii
2858 if (ii == tlevel) {
2859 ii += 2; // three teams have same level
2860 } else {
2861 ii++; // two teams have same level
2862 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002863 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865#endif
2866
Jonathan Peyton30419822017-05-12 18:01:32 +00002867 if (ii == level)
2868 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002869
Jonathan Peyton30419822017-05-12 18:01:32 +00002870 dd = team->t.t_serialized;
2871 level++;
2872 while (ii > level) {
2873 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002874 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002875 if ((team->t.t_serialized) && (!dd)) {
2876 team = team->t.t_parent;
2877 continue;
2878 }
2879 if (ii > level) {
2880 team = team->t.t_parent;
2881 dd = team->t.t_serialized;
2882 ii--;
2883 }
2884 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885
Jonathan Peyton30419822017-05-12 18:01:32 +00002886 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887}
2888
Jonathan Peyton30419822017-05-12 18:01:32 +00002889int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002890
Jonathan Peyton30419822017-05-12 18:01:32 +00002891 int ii, dd;
2892 kmp_team_t *team;
2893 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002894
Jonathan Peyton30419822017-05-12 18:01:32 +00002895 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2896 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897
Jonathan Peyton30419822017-05-12 18:01:32 +00002898 // validate level
2899 if (level == 0)
2900 return 1;
2901 if (level < 0)
2902 return -1;
2903 thr = __kmp_threads[gtid];
2904 team = thr->th.th_team;
2905 ii = team->t.t_level;
2906 if (level > ii)
2907 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002908
2909#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002910 if (thr->th.th_teams_microtask) {
2911 // AC: we are in teams region where multiple nested teams have same level
2912 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2913 if (level <=
2914 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2915 KMP_DEBUG_ASSERT(ii >= tlevel);
2916 // AC: As we need to pass by the teams league, we need to artificially
2917 // increase ii
2918 if (ii == tlevel) {
2919 ii += 2; // three teams have same level
2920 } else {
2921 ii++; // two teams have same level
2922 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002923 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925#endif
2926
Jonathan Peyton30419822017-05-12 18:01:32 +00002927 while (ii > level) {
2928 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002929 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002930 if (team->t.t_serialized && (!dd)) {
2931 team = team->t.t_parent;
2932 continue;
2933 }
2934 if (ii > level) {
2935 team = team->t.t_parent;
2936 ii--;
2937 }
2938 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939
Jonathan Peyton30419822017-05-12 18:01:32 +00002940 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002941}
2942
Jonathan Peyton30419822017-05-12 18:01:32 +00002943kmp_r_sched_t __kmp_get_schedule_global() {
2944 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2945 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2946 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947
Jonathan Peyton30419822017-05-12 18:01:32 +00002948 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949
Jonathan Peyton30419822017-05-12 18:01:32 +00002950 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2951 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2952 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2953 // different roots (even in OMP 2.5)
2954 if (__kmp_sched == kmp_sch_static) {
2955 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed
2956 // schedule (balanced or greedy)
2957 } else if (__kmp_sched == kmp_sch_guided_chunked) {
2958 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed
2959 // schedule (iterative or analytical)
2960 } else {
2961 r_sched.r_sched_type =
2962 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2963 }
2964
2965 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it
2966 // was not ever set)
2967 r_sched.chunk = KMP_DEFAULT_CHUNK;
2968 } else {
2969 r_sched.chunk = __kmp_chunk;
2970 }
2971
2972 return r_sched;
2973}
2974
2975/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2976 at least argc number of *t_argv entries for the requested team. */
2977static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2978
2979 KMP_DEBUG_ASSERT(team);
2980 if (!realloc || argc > team->t.t_max_argc) {
2981
2982 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
2983 "current entries=%d\n",
2984 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2985 /* if previously allocated heap space for args, free them */
2986 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2987 __kmp_free((void *)team->t.t_argv);
2988
2989 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
2990 /* use unused space in the cache line for arguments */
2991 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2992 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
2993 "argv entries\n",
2994 team->t.t_id, team->t.t_max_argc));
2995 team->t.t_argv = &team->t.t_inline_argv[0];
2996 if (__kmp_storage_map) {
2997 __kmp_print_storage_map_gtid(
2998 -1, &team->t.t_inline_argv[0],
2999 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3000 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3001 team->t.t_id);
3002 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003003 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003004 /* allocate space for arguments in the heap */
3005 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3006 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3007 : 2 * argc;
3008 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3009 "argv entries\n",
3010 team->t.t_id, team->t.t_max_argc));
3011 team->t.t_argv =
3012 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3013 if (__kmp_storage_map) {
3014 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3015 &team->t.t_argv[team->t.t_max_argc],
3016 sizeof(void *) * team->t.t_max_argc,
3017 "team_%d.t_argv", team->t.t_id);
3018 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003020 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003021}
3022
Jonathan Peyton30419822017-05-12 18:01:32 +00003023static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3024 int i;
3025 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3026 team->t.t_threads =
3027 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3028 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3029 sizeof(dispatch_shared_info_t) * num_disp_buff);
3030 team->t.t_dispatch =
3031 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3032 team->t.t_implicit_task_taskdata =
3033 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3034 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003035
Jonathan Peyton30419822017-05-12 18:01:32 +00003036 /* setup dispatch buffers */
3037 for (i = 0; i < num_disp_buff; ++i) {
3038 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003039#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003041#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003042 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003043}
3044
Jonathan Peyton30419822017-05-12 18:01:32 +00003045static void __kmp_free_team_arrays(kmp_team_t *team) {
3046 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3047 int i;
3048 for (i = 0; i < team->t.t_max_nproc; ++i) {
3049 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3050 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3051 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003052 }
3053 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 __kmp_free(team->t.t_threads);
3055 __kmp_free(team->t.t_disp_buffer);
3056 __kmp_free(team->t.t_dispatch);
3057 __kmp_free(team->t.t_implicit_task_taskdata);
3058 team->t.t_threads = NULL;
3059 team->t.t_disp_buffer = NULL;
3060 team->t.t_dispatch = NULL;
3061 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062}
3063
Jonathan Peyton30419822017-05-12 18:01:32 +00003064static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3065 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066
Jonathan Peyton30419822017-05-12 18:01:32 +00003067 __kmp_free(team->t.t_disp_buffer);
3068 __kmp_free(team->t.t_dispatch);
3069 __kmp_free(team->t.t_implicit_task_taskdata);
3070 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003071
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 KMP_MEMCPY(team->t.t_threads, oldThreads,
3073 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074
Jonathan Peyton30419822017-05-12 18:01:32 +00003075 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003076}
3077
Jonathan Peyton30419822017-05-12 18:01:32 +00003078static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079
Jonathan Peyton30419822017-05-12 18:01:32 +00003080 kmp_r_sched_t r_sched =
3081 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003082
3083#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003084 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003085#endif /* OMP_40_ENABLED */
3086
Jonathan Peyton30419822017-05-12 18:01:32 +00003087 kmp_internal_control_t g_icvs = {
3088 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3089 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3090 // for nested parallelism (per thread)
3091 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3092 // adjustment of threads (per thread)
3093 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3094 // whether blocktime is explicitly set
3095 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003096#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3098// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003099#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003100 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3101 // next parallel region (per thread)
3102 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3103 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3104 // for max_active_levels
3105 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3106// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003107#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003108 __kmp_nested_proc_bind.bind_types[0],
3109 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003110#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003111 NULL // struct kmp_internal_control *next;
3112 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003113
Jonathan Peyton30419822017-05-12 18:01:32 +00003114 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003115}
3116
Jonathan Peyton30419822017-05-12 18:01:32 +00003117static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003118
Jonathan Peyton30419822017-05-12 18:01:32 +00003119 kmp_internal_control_t gx_icvs;
3120 gx_icvs.serial_nesting_level =
3121 0; // probably =team->t.t_serial like in save_inter_controls
3122 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3123 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126}
3127
Jonathan Peyton30419822017-05-12 18:01:32 +00003128static void __kmp_initialize_root(kmp_root_t *root) {
3129 int f;
3130 kmp_team_t *root_team;
3131 kmp_team_t *hot_team;
3132 int hot_team_max_nth;
3133 kmp_r_sched_t r_sched =
3134 __kmp_get_schedule_global(); // get current state of scheduling globals
3135 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3136 KMP_DEBUG_ASSERT(root);
3137 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 /* setup the root state structure */
3140 __kmp_init_lock(&root->r.r_begin_lock);
3141 root->r.r_begin = FALSE;
3142 root->r.r_active = FALSE;
3143 root->r.r_in_parallel = 0;
3144 root->r.r_blocktime = __kmp_dflt_blocktime;
3145 root->r.r_nested = __kmp_dflt_nested;
Jonathan Peytonf4392462017-07-27 20:58:41 +00003146 root->r.r_cg_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147
Jonathan Peyton30419822017-05-12 18:01:32 +00003148 /* setup the root team for this task */
3149 /* allocate the root team structure */
3150 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003151
Jonathan Peyton30419822017-05-12 18:01:32 +00003152 root_team =
3153 __kmp_allocate_team(root,
3154 1, // new_nproc
3155 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003156#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003158#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003159#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003160 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 &r_icvs,
3163 0 // argc
3164 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3165 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003166#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003167 // Non-NULL value should be assigned to make the debugger display the root
3168 // team.
3169 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003170#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003171
Jonathan Peyton30419822017-05-12 18:01:32 +00003172 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 root->r.r_root_team = root_team;
3175 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003176
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 /* initialize root team */
3178 root_team->t.t_threads[0] = NULL;
3179 root_team->t.t_nproc = 1;
3180 root_team->t.t_serialized = 1;
3181 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3182 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3183 root_team->t.t_sched.chunk = r_sched.chunk;
3184 KA_TRACE(
3185 20,
3186 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3187 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003188
Jonathan Peyton30419822017-05-12 18:01:32 +00003189 /* setup the hot team for this task */
3190 /* allocate the hot team structure */
3191 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 hot_team =
3194 __kmp_allocate_team(root,
3195 1, // new_nproc
3196 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003197#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003198 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003199#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003200#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003201 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003202#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003203 &r_icvs,
3204 0 // argc
3205 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3206 );
3207 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003208
Jonathan Peyton30419822017-05-12 18:01:32 +00003209 root->r.r_hot_team = hot_team;
3210 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003211
Jonathan Peyton30419822017-05-12 18:01:32 +00003212 /* first-time initialization */
3213 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003214
Jonathan Peyton30419822017-05-12 18:01:32 +00003215 /* initialize hot team */
3216 hot_team_max_nth = hot_team->t.t_max_nproc;
3217 for (f = 0; f < hot_team_max_nth; ++f) {
3218 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003219 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003220 hot_team->t.t_nproc = 1;
3221 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3222 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3223 hot_team->t.t_sched.chunk = r_sched.chunk;
3224 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003225}
3226
3227#ifdef KMP_DEBUG
3228
Jim Cownie5e8470a2013-09-27 10:38:44 +00003229typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003230 kmp_team_p const *entry;
3231 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003233typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234
Jonathan Peyton30419822017-05-12 18:01:32 +00003235static void __kmp_print_structure_team_accum( // Add team to list of teams.
3236 kmp_team_list_t list, // List of teams.
3237 kmp_team_p const *team // Team to add.
3238 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003239
Jonathan Peyton30419822017-05-12 18:01:32 +00003240 // List must terminate with item where both entry and next are NULL.
3241 // Team is added to the list only once.
3242 // List is sorted in ascending order by team id.
3243 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003244
Jonathan Peyton30419822017-05-12 18:01:32 +00003245 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003246
Jonathan Peyton30419822017-05-12 18:01:32 +00003247 KMP_DEBUG_ASSERT(list != NULL);
3248 if (team == NULL) {
3249 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003250 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003251
Jonathan Peyton30419822017-05-12 18:01:32 +00003252 __kmp_print_structure_team_accum(list, team->t.t_parent);
3253 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003254
Jonathan Peyton30419822017-05-12 18:01:32 +00003255 // Search list for the team.
3256 l = list;
3257 while (l->next != NULL && l->entry != team) {
3258 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003259 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003260 if (l->next != NULL) {
3261 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 // Team is not found. Search list again for insertion point.
3265 l = list;
3266 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3267 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003268 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003269
Jonathan Peyton30419822017-05-12 18:01:32 +00003270 // Insert team.
3271 {
3272 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3273 sizeof(kmp_team_list_item_t));
3274 *item = *l;
3275 l->entry = team;
3276 l->next = item;
3277 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003278}
3279
Jonathan Peyton30419822017-05-12 18:01:32 +00003280static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003281
Jonathan Peyton30419822017-05-12 18:01:32 +00003282 ) {
3283 __kmp_printf("%s", title);
3284 if (team != NULL) {
3285 __kmp_printf("%2x %p\n", team->t.t_id, team);
3286 } else {
3287 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003288 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003289}
3290
Jonathan Peyton30419822017-05-12 18:01:32 +00003291static void __kmp_print_structure_thread(char const *title,
3292 kmp_info_p const *thread) {
3293 __kmp_printf("%s", title);
3294 if (thread != NULL) {
3295 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3296 } else {
3297 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299}
3300
Jonathan Peyton30419822017-05-12 18:01:32 +00003301void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003302
Jonathan Peyton30419822017-05-12 18:01:32 +00003303 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003304
Jonathan Peyton30419822017-05-12 18:01:32 +00003305 // Initialize list of teams.
3306 list =
3307 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3308 list->entry = NULL;
3309 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003310
Jonathan Peyton30419822017-05-12 18:01:32 +00003311 __kmp_printf("\n------------------------------\nGlobal Thread "
3312 "Table\n------------------------------\n");
3313 {
3314 int gtid;
3315 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3316 __kmp_printf("%2d", gtid);
3317 if (__kmp_threads != NULL) {
3318 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003319 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003320 if (__kmp_root != NULL) {
3321 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003322 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003323 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003324 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003325 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003326
Jonathan Peyton30419822017-05-12 18:01:32 +00003327 // Print out __kmp_threads array.
3328 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3329 "----------\n");
3330 if (__kmp_threads != NULL) {
3331 int gtid;
3332 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3333 kmp_info_t const *thread = __kmp_threads[gtid];
3334 if (thread != NULL) {
3335 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3336 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3337 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3338 __kmp_print_structure_team(" Serial Team: ",
3339 thread->th.th_serial_team);
3340 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3341 __kmp_print_structure_thread(" Master: ",
3342 thread->th.th_team_master);
3343 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3344 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003345#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003346 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003347#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003348 __kmp_print_structure_thread(" Next in pool: ",
3349 thread->th.th_next_pool);
3350 __kmp_printf("\n");
3351 __kmp_print_structure_team_accum(list, thread->th.th_team);
3352 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003353 }
3354 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003355 } else {
3356 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003357 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003358
Jonathan Peyton30419822017-05-12 18:01:32 +00003359 // Print out __kmp_root array.
3360 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3361 "--------\n");
3362 if (__kmp_root != NULL) {
3363 int gtid;
3364 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3365 kmp_root_t const *root = __kmp_root[gtid];
3366 if (root != NULL) {
3367 __kmp_printf("GTID %2d %p:\n", gtid, root);
3368 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3369 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3370 __kmp_print_structure_thread(" Uber Thread: ",
3371 root->r.r_uber_thread);
3372 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3373 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
3374 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel);
3375 __kmp_printf("\n");
3376 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3377 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003378 }
3379 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003380 } else {
3381 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003382 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003383
Jonathan Peyton30419822017-05-12 18:01:32 +00003384 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3385 "--------\n");
3386 while (list->next != NULL) {
3387 kmp_team_p const *team = list->entry;
3388 int i;
3389 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3390 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3391 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3392 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3393 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3394 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3395 for (i = 0; i < team->t.t_nproc; ++i) {
3396 __kmp_printf(" Thread %2d: ", i);
3397 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003398 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3400 __kmp_printf("\n");
3401 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003402 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003403
Jonathan Peyton30419822017-05-12 18:01:32 +00003404 // Print out __kmp_thread_pool and __kmp_team_pool.
3405 __kmp_printf("\n------------------------------\nPools\n----------------------"
3406 "--------\n");
3407 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003408 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003409 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003410 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003412
Jonathan Peyton30419822017-05-12 18:01:32 +00003413 // Free team list.
3414 while (list != NULL) {
3415 kmp_team_list_item_t *item = list;
3416 list = list->next;
3417 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003418 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419}
3420
3421#endif
3422
Jim Cownie5e8470a2013-09-27 10:38:44 +00003423//---------------------------------------------------------------------------
3424// Stuff for per-thread fast random number generator
3425// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003427 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3428 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3429 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3430 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3431 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3432 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3433 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3434 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3435 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3436 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3437 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438
3439//---------------------------------------------------------------------------
3440// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003441unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003442 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
3447 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003448 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003449
3450 return r;
3451}
3452//--------------------------------------------------------
3453// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003454void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455 unsigned seed = thread->th.th_info.ds.ds_tid;
3456
Jonathan Peyton30419822017-05-12 18:01:32 +00003457 thread->th.th_a =
3458 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3459 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3460 KA_TRACE(30,
3461 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003462}
3463
Jim Cownie5e8470a2013-09-27 10:38:44 +00003464#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003465/* reclaim array entries for root threads that are already dead, returns number
3466 * reclaimed */
3467static int __kmp_reclaim_dead_roots(void) {
3468 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003469
Jonathan Peyton30419822017-05-12 18:01:32 +00003470 for (i = 0; i < __kmp_threads_capacity; ++i) {
3471 if (KMP_UBER_GTID(i) &&
3472 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3473 !__kmp_root[i]
3474 ->r.r_active) { // AC: reclaim only roots died in non-active state
3475 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003476 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003477 }
3478 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479}
3480#endif
3481
Jonathan Peyton30419822017-05-12 18:01:32 +00003482/* This function attempts to create free entries in __kmp_threads and
3483 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003484
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 For Windows* OS static library, the first mechanism used is to reclaim array
3486 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487
Jonathan Peyton30419822017-05-12 18:01:32 +00003488 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3489 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3490 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3491 threadprivate cache array has been created. Synchronization with
3492 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493
Jonathan Peyton30419822017-05-12 18:01:32 +00003494 After any dead root reclamation, if the clipping value allows array expansion
3495 to result in the generation of a total of nWish free slots, the function does
3496 that expansion. If not, but the clipping value allows array expansion to
3497 result in the generation of a total of nNeed free slots, the function does
3498 that expansion. Otherwise, nothing is done beyond the possible initial root
3499 thread reclamation. However, if nNeed is zero, a best-effort attempt is made
3500 to fulfil nWish as far as possible, i.e. the function will attempt to create
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501 as many free slots as possible up to nWish.
3502
Jonathan Peyton30419822017-05-12 18:01:32 +00003503 If any argument is negative, the behavior is undefined. */
3504static int __kmp_expand_threads(int nWish, int nNeed) {
3505 int added = 0;
3506 int old_tp_cached;
3507 int __kmp_actual_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003508
Jonathan Peyton30419822017-05-12 18:01:32 +00003509 if (nNeed > nWish) /* normalize the arguments */
3510 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003511#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003512 /* only for Windows static library */
3513 /* reclaim array entries for root threads that are already dead */
3514 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003515
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 if (nNeed) {
3517 nNeed -= added;
3518 if (nNeed < 0)
3519 nNeed = 0;
3520 }
3521 if (nWish) {
3522 nWish -= added;
3523 if (nWish < 0)
3524 nWish = 0;
3525 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003526#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 if (nWish <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003528 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003529
3530 while (1) {
3531 int nTarget;
3532 int minimumRequiredCapacity;
3533 int newCapacity;
3534 kmp_info_t **newThreads;
3535 kmp_root_t **newRoot;
3536
3537 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3538 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
Jonathan Peytonf4392462017-07-27 20:58:41 +00003539 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
Jonathan Peyton30419822017-05-12 18:01:32 +00003540 // > __kmp_max_nth in one of two ways:
3541 //
3542 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3543 // may not be resused by another thread, so we may need to increase
Jonathan Peyton09244f32017-07-26 20:07:58 +00003544 // __kmp_threads_capacity to __kmp_max_nth + 1.
Jonathan Peyton30419822017-05-12 18:01:32 +00003545 //
3546 // 2) New foreign root(s) are encountered. We always register new foreign
3547 // roots. This may cause a smaller # of threads to be allocated at
3548 // subsequent parallel regions, but the worker threads hang around (and
3549 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3550 //
3551 // Anyway, that is the reason for moving the check to see if
Jonathan Peyton09244f32017-07-26 20:07:58 +00003552 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 // instead of having it performed here. -BB
3554 old_tp_cached = __kmp_tp_cached;
3555 __kmp_actual_max_nth =
3556 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3557 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3558
3559 /* compute expansion headroom to check if we can expand and whether to aim
3560 for nWish or nNeed */
3561 nTarget = nWish;
3562 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3563 /* can't fulfil nWish, so try nNeed */
3564 if (nNeed) {
3565 nTarget = nNeed;
3566 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3567 /* possible expansion too small -- give up */
3568 break;
3569 }
3570 } else {
3571 /* best-effort */
3572 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3573 if (!nTarget) {
3574 /* can expand at all -- give up */
3575 break;
3576 }
3577 }
3578 }
3579 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3580
3581 newCapacity = __kmp_threads_capacity;
3582 do {
3583 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3584 ? (newCapacity << 1)
3585 : __kmp_actual_max_nth;
3586 } while (newCapacity < minimumRequiredCapacity);
3587 newThreads = (kmp_info_t **)__kmp_allocate(
3588 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity +
3589 CACHE_LINE);
3590 newRoot = (kmp_root_t **)((char *)newThreads +
3591 sizeof(kmp_info_t *) * newCapacity);
3592 KMP_MEMCPY(newThreads, __kmp_threads,
3593 __kmp_threads_capacity * sizeof(kmp_info_t *));
3594 KMP_MEMCPY(newRoot, __kmp_root,
3595 __kmp_threads_capacity * sizeof(kmp_root_t *));
3596 memset(newThreads + __kmp_threads_capacity, 0,
3597 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *));
3598 memset(newRoot + __kmp_threads_capacity, 0,
3599 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *));
3600
3601 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3602 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has
3603 allocated a threadprivate cache while we were allocating the expanded
3604 array, and our new capacity is larger than the threadprivate cache
3605 capacity, so we should deallocate the expanded arrays and try again.
3606 This is the first check of a double-check pair. */
3607 __kmp_free(newThreads);
3608 continue; /* start over and try again */
3609 }
3610 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3611 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3612 /* Same check as above, but this time with the lock so we can be sure if
3613 we can succeed. */
3614 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3615 __kmp_free(newThreads);
3616 continue; /* start over and try again */
3617 } else {
3618 /* success */
3619 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be
3620 // investigated.
3621 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3622 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3623 added += newCapacity - __kmp_threads_capacity;
3624 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3625 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3626 break; /* succeeded, so we can exit the loop */
3627 }
3628 }
3629 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003630}
3631
Jonathan Peyton30419822017-05-12 18:01:32 +00003632/* Register the current thread as a root thread and obtain our gtid. We must
3633 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3634 thread that calls from __kmp_do_serial_initialize() */
3635int __kmp_register_root(int initial_thread) {
3636 kmp_info_t *root_thread;
3637 kmp_root_t *root;
3638 int gtid;
3639 int capacity;
3640 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3641 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3642 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 /* 2007-03-02:
3645 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3646 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3647 work as expected -- it may return false (that means there is at least one
3648 empty slot in __kmp_threads array), but it is possible the only free slot
3649 is #0, which is reserved for initial thread and so cannot be used for this
3650 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003651
Jonathan Peyton30419822017-05-12 18:01:32 +00003652 However, right solution seems to be not reserving slot #0 for initial
3653 thread because:
3654 (1) there is no magic in slot #0,
3655 (2) we cannot detect initial thread reliably (the first thread which does
3656 serial initialization may be not a real initial thread).
3657 */
3658 capacity = __kmp_threads_capacity;
3659 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3660 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003661 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003662
Jonathan Peyton30419822017-05-12 18:01:32 +00003663 /* see if there are too many threads */
3664 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3665 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003666 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3667 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3668 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003669 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003670 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3671 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003673 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003674
3675 /* find an available thread slot */
3676 /* Don't reassign the zero slot since we need that to only be used by initial
3677 thread */
3678 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3679 gtid++)
3680 ;
3681 KA_TRACE(1,
3682 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3683 KMP_ASSERT(gtid < __kmp_threads_capacity);
3684
3685 /* update global accounting */
3686 __kmp_all_nth++;
3687 TCW_4(__kmp_nth, __kmp_nth + 1);
3688
3689 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3690 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3691 if (__kmp_adjust_gtid_mode) {
3692 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3693 if (TCR_4(__kmp_gtid_mode) != 2) {
3694 TCW_4(__kmp_gtid_mode, 2);
3695 }
3696 } else {
3697 if (TCR_4(__kmp_gtid_mode) != 1) {
3698 TCW_4(__kmp_gtid_mode, 1);
3699 }
3700 }
3701 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003702
3703#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003704 /* Adjust blocktime to zero if necessary */
3705 /* Middle initialization might not have occurred yet */
3706 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3707 if (__kmp_nth > __kmp_avail_proc) {
3708 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003709 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003710 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711#endif /* KMP_ADJUST_BLOCKTIME */
3712
Jonathan Peyton30419822017-05-12 18:01:32 +00003713 /* setup this new hierarchy */
3714 if (!(root = __kmp_root[gtid])) {
3715 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3716 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3717 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003719#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003720 // Initialize stats as soon as possible (right after gtid assignment).
3721 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3722 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3723 KMP_SET_THREAD_STATE(SERIAL_REGION);
3724 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003725#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003726 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727
Jonathan Peyton30419822017-05-12 18:01:32 +00003728 /* setup new root thread structure */
3729 if (root->r.r_uber_thread) {
3730 root_thread = root->r.r_uber_thread;
3731 } else {
3732 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3733 if (__kmp_storage_map) {
3734 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 root_thread->th.th_info.ds.ds_gtid = gtid;
3737 root_thread->th.th_root = root;
3738 if (__kmp_env_consistency_check) {
3739 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3740 }
3741#if USE_FAST_MEMORY
3742 __kmp_initialize_fast_memory(root_thread);
3743#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745#if KMP_USE_BGET
3746 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3747 __kmp_initialize_bget(root_thread);
3748#endif
3749 __kmp_init_random(root_thread); // Initialize random number generator
3750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003751
Jonathan Peyton30419822017-05-12 18:01:32 +00003752 /* setup the serial team held in reserve by the root thread */
3753 if (!root_thread->th.th_serial_team) {
3754 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3755 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3756 root_thread->th.th_serial_team =
3757 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003758#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003759 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003760#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003761#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003762 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003763#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003764 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3765 }
3766 KMP_ASSERT(root_thread->th.th_serial_team);
3767 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3768 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 /* drop root_thread into place */
3771 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003772
Jonathan Peyton30419822017-05-12 18:01:32 +00003773 root->r.r_root_team->t.t_threads[0] = root_thread;
3774 root->r.r_hot_team->t.t_threads[0] = root_thread;
3775 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3776 // AC: the team created in reserve, not for execution (it is unused for now).
3777 root_thread->th.th_serial_team->t.t_serialized = 0;
3778 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003779
Jonathan Peyton30419822017-05-12 18:01:32 +00003780 /* initialize the thread, get it ready to go */
3781 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3782 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003783
Jonathan Peyton30419822017-05-12 18:01:32 +00003784 /* prepare the master thread for get_gtid() */
3785 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003786
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003787#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003788 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003789#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003790
Jonathan Peyton30419822017-05-12 18:01:32 +00003791#ifdef KMP_TDATA_GTID
3792 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003793#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3795 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3796
3797 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3798 "plain=%u\n",
3799 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3800 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3801 KMP_INIT_BARRIER_STATE));
3802 { // Initialize barrier data.
3803 int b;
3804 for (b = 0; b < bs_last_barrier; ++b) {
3805 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3806#if USE_DEBUGGER
3807 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3808#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003809 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003810 }
3811 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3812 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003813
Alp Toker763b9392014-02-28 09:42:41 +00003814#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003815#if OMP_40_ENABLED
3816 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3817 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3818 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3819 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3820#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003821
Jonathan Peyton30419822017-05-12 18:01:32 +00003822 if (TCR_4(__kmp_init_middle)) {
3823 __kmp_affinity_set_init_mask(gtid, TRUE);
3824 }
Alp Toker763b9392014-02-28 09:42:41 +00003825#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003826
Jonathan Peyton30419822017-05-12 18:01:32 +00003827 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003828
Jonathan Peyton30419822017-05-12 18:01:32 +00003829 KMP_MB();
3830 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003831
Jonathan Peyton30419822017-05-12 18:01:32 +00003832 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003833}
3834
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003835#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003836static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3837 const int max_level) {
3838 int i, n, nth;
3839 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3840 if (!hot_teams || !hot_teams[level].hot_team) {
3841 return 0;
3842 }
3843 KMP_DEBUG_ASSERT(level < max_level);
3844 kmp_team_t *team = hot_teams[level].hot_team;
3845 nth = hot_teams[level].hot_team_nth;
3846 n = nth - 1; // master is not freed
3847 if (level < max_level - 1) {
3848 for (i = 0; i < nth; ++i) {
3849 kmp_info_t *th = team->t.t_threads[i];
3850 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3851 if (i > 0 && th->th.th_hot_teams) {
3852 __kmp_free(th->th.th_hot_teams);
3853 th->th.th_hot_teams = NULL;
3854 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003855 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003856 }
3857 __kmp_free_team(root, team, NULL);
3858 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003859}
3860#endif
3861
Jonathan Peyton30419822017-05-12 18:01:32 +00003862// Resets a root thread and clear its root and hot teams.
3863// Returns the number of __kmp_threads entries directly and indirectly freed.
3864static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3865 kmp_team_t *root_team = root->r.r_root_team;
3866 kmp_team_t *hot_team = root->r.r_hot_team;
3867 int n = hot_team->t.t_nproc;
3868 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003869
Jonathan Peyton30419822017-05-12 18:01:32 +00003870 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871
Jonathan Peyton30419822017-05-12 18:01:32 +00003872 root->r.r_root_team = NULL;
3873 root->r.r_hot_team = NULL;
3874 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3875 // before call to __kmp_free_team().
3876 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003877#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003878 if (__kmp_hot_teams_max_level >
3879 0) { // need to free nested hot teams and their threads if any
3880 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3881 kmp_info_t *th = hot_team->t.t_threads[i];
3882 if (__kmp_hot_teams_max_level > 1) {
3883 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3884 }
3885 if (th->th.th_hot_teams) {
3886 __kmp_free(th->th.th_hot_teams);
3887 th->th.th_hot_teams = NULL;
3888 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003889 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003890 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003891#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003892 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003893
Jonathan Peyton30419822017-05-12 18:01:32 +00003894 // Before we can reap the thread, we need to make certain that all other
3895 // threads in the teams that had this root as ancestor have stopped trying to
3896 // steal tasks.
3897 if (__kmp_tasking_mode != tskm_immediate_exec) {
3898 __kmp_wait_to_unref_task_teams();
3899 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003900
Jonathan Peyton30419822017-05-12 18:01:32 +00003901#if KMP_OS_WINDOWS
3902 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3903 KA_TRACE(
3904 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3905 "\n",
3906 (LPVOID) & (root->r.r_uber_thread->th),
3907 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3908 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3909#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003911#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003912 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3913 int gtid = __kmp_get_gtid();
3914 __ompt_thread_end(ompt_thread_initial, gtid);
3915 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003916#endif
3917
Jonathan Peyton30419822017-05-12 18:01:32 +00003918 TCW_4(__kmp_nth,
3919 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peytonf4392462017-07-27 20:58:41 +00003920 root->r.r_cg_nthreads--;
3921
Jonathan Peyton30419822017-05-12 18:01:32 +00003922 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003923
Jonathan Peyton30419822017-05-12 18:01:32 +00003924 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3925 // of freeing.
3926 root->r.r_uber_thread = NULL;
3927 /* mark root as no longer in use */
3928 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003929
Jonathan Peyton30419822017-05-12 18:01:32 +00003930 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003931}
3932
Jonathan Peyton30419822017-05-12 18:01:32 +00003933void __kmp_unregister_root_current_thread(int gtid) {
3934 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3935 /* this lock should be ok, since unregister_root_current_thread is never
3936 called during an abort, only during a normal close. furthermore, if you
3937 have the forkjoin lock, you should never try to get the initz lock */
3938 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3939 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3940 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3941 "exiting T#%d\n",
3942 gtid));
3943 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3944 return;
3945 }
3946 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003947
Jonathan Peyton30419822017-05-12 18:01:32 +00003948 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3949 KMP_ASSERT(KMP_UBER_GTID(gtid));
3950 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3951 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003952
Jonathan Peyton30419822017-05-12 18:01:32 +00003953 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003954
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003955#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003956 kmp_info_t *thread = __kmp_threads[gtid];
3957 kmp_team_t *team = thread->th.th_team;
3958 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003959
Jonathan Peyton30419822017-05-12 18:01:32 +00003960 // we need to wait for the proxy tasks before finishing the thread
3961 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003962#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003963 // the runtime is shutting down so we won't report any events
3964 thread->th.ompt_thread_info.state = ompt_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003965#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003966 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3967 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003968#endif
3969
Jonathan Peyton30419822017-05-12 18:01:32 +00003970 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003971
Jonathan Peyton30419822017-05-12 18:01:32 +00003972 /* free up this thread slot */
3973 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00003975 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976#endif
3977
Jonathan Peyton30419822017-05-12 18:01:32 +00003978 KMP_MB();
3979 KC_TRACE(10,
3980 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003981
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003983}
3984
Jonathan Peyton2321d572015-06-08 19:25:25 +00003985#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00003987 Unregisters a root thread that is not the current thread. Returns the number
3988 of __kmp_threads entries freed as a result. */
3989static int __kmp_unregister_root_other_thread(int gtid) {
3990 kmp_root_t *root = __kmp_root[gtid];
3991 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003992
Jonathan Peyton30419822017-05-12 18:01:32 +00003993 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3994 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3995 KMP_ASSERT(KMP_UBER_GTID(gtid));
3996 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3997 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003998
Jonathan Peyton30419822017-05-12 18:01:32 +00003999 r = __kmp_reset_root(gtid, root);
4000 KC_TRACE(10,
4001 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4002 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003}
Jonathan Peyton2321d572015-06-08 19:25:25 +00004004#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004005
Jim Cownie5e8470a2013-09-27 10:38:44 +00004006#if KMP_DEBUG
4007void __kmp_task_info() {
4008
Jonathan Peyton30419822017-05-12 18:01:32 +00004009 kmp_int32 gtid = __kmp_entry_gtid();
4010 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4011 kmp_info_t *this_thr = __kmp_threads[gtid];
4012 kmp_team_t *steam = this_thr->th.th_serial_team;
4013 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004014
Jonathan Peyton30419822017-05-12 18:01:32 +00004015 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
4016 "ptask=%p\n",
4017 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4018 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004019}
4020#endif // KMP_DEBUG
4021
Jonathan Peyton30419822017-05-12 18:01:32 +00004022/* TODO optimize with one big memclr, take out what isn't needed, split
4023 responsibility to workers as much as possible, and delay initialization of
4024 features as much as possible */
4025static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4026 int tid, int gtid) {
4027 /* this_thr->th.th_info.ds.ds_gtid is setup in
4028 kmp_allocate_thread/create_worker.
4029 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4030 kmp_info_t *master = team->t.t_threads[0];
4031 KMP_DEBUG_ASSERT(this_thr != NULL);
4032 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4033 KMP_DEBUG_ASSERT(team);
4034 KMP_DEBUG_ASSERT(team->t.t_threads);
4035 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4036 KMP_DEBUG_ASSERT(master);
4037 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004038
Jonathan Peyton30419822017-05-12 18:01:32 +00004039 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004040
Jonathan Peyton30419822017-05-12 18:01:32 +00004041 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
Jonathan Peyton30419822017-05-12 18:01:32 +00004043 this_thr->th.th_info.ds.ds_tid = tid;
4044 this_thr->th.th_set_nproc = 0;
4045 if (__kmp_tasking_mode != tskm_immediate_exec)
4046 // When tasking is possible, threads are not safe to reap until they are
4047 // done tasking; this will be set when tasking code is exited in wait
4048 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4049 else // no tasking --> always safe to reap
4050 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004051#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004052 this_thr->th.th_set_proc_bind = proc_bind_default;
4053#if KMP_AFFINITY_SUPPORTED
4054 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004056#endif
4057 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004058
Jonathan Peyton30419822017-05-12 18:01:32 +00004059 /* setup the thread's cache of the team structure */
4060 this_thr->th.th_team_nproc = team->t.t_nproc;
4061 this_thr->th.th_team_master = master;
4062 this_thr->th.th_team_serialized = team->t.t_serialized;
4063 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004064
Jonathan Peyton30419822017-05-12 18:01:32 +00004065 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066
Jonathan Peyton30419822017-05-12 18:01:32 +00004067 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4068 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069
Jonathan Peyton30419822017-05-12 18:01:32 +00004070 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4071 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004072
Jonathan Peyton30419822017-05-12 18:01:32 +00004073 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4074 tid, gtid, this_thr, this_thr->th.th_current_task));
4075 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4076 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077
Jonathan Peyton30419822017-05-12 18:01:32 +00004078 /* TODO no worksharing in speculative threads */
4079 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004080
Jonathan Peyton30419822017-05-12 18:01:32 +00004081 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004082
Jonathan Peyton30419822017-05-12 18:01:32 +00004083 if (!this_thr->th.th_pri_common) {
4084 this_thr->th.th_pri_common =
4085 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4086 if (__kmp_storage_map) {
4087 __kmp_print_storage_map_gtid(
4088 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4089 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004090 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004091 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004092 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004093
Jonathan Peyton30419822017-05-12 18:01:32 +00004094 /* Initialize dynamic dispatch */
4095 {
4096 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4097 // Use team max_nproc since this will never change for the team.
4098 size_t disp_size =
4099 sizeof(dispatch_private_info_t) *
4100 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4101 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4102 team->t.t_max_nproc));
4103 KMP_ASSERT(dispatch);
4104 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4105 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004106
Jonathan Peyton30419822017-05-12 18:01:32 +00004107 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004108#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004109 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004110#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004111 if (!dispatch->th_disp_buffer) {
4112 dispatch->th_disp_buffer =
4113 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004114
Jonathan Peyton30419822017-05-12 18:01:32 +00004115 if (__kmp_storage_map) {
4116 __kmp_print_storage_map_gtid(
4117 gtid, &dispatch->th_disp_buffer[0],
4118 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4119 ? 1
4120 : __kmp_dispatch_num_buffers],
4121 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4122 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4123 gtid, team->t.t_id, gtid);
4124 }
4125 } else {
4126 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127 }
4128
Jonathan Peyton30419822017-05-12 18:01:32 +00004129 dispatch->th_dispatch_pr_current = 0;
4130 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004131
Jonathan Peyton30419822017-05-12 18:01:32 +00004132 dispatch->th_deo_fcn = 0; /* ORDERED */
4133 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4134 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004135
Jonathan Peyton30419822017-05-12 18:01:32 +00004136 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004137
Jonathan Peyton30419822017-05-12 18:01:32 +00004138 if (!this_thr->th.th_task_state_memo_stack) {
4139 size_t i;
4140 this_thr->th.th_task_state_memo_stack =
4141 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4142 this_thr->th.th_task_state_top = 0;
4143 this_thr->th.th_task_state_stack_sz = 4;
4144 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4145 ++i) // zero init the stack
4146 this_thr->th.th_task_state_memo_stack[i] = 0;
4147 }
4148
4149 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4150 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4151
4152 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004153}
4154
Jonathan Peyton30419822017-05-12 18:01:32 +00004155/* allocate a new thread for the requesting team. this is only called from
4156 within a forkjoin critical section. we will first try to get an available
4157 thread from the thread pool. if none is available, we will fork a new one
4158 assuming we are able to create a new one. this should be assured, as the
4159 caller should check on this first. */
4160kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4161 int new_tid) {
4162 kmp_team_t *serial_team;
4163 kmp_info_t *new_thr;
4164 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004165
Jonathan Peyton30419822017-05-12 18:01:32 +00004166 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4167 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004168#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004169 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004170#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004171 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004172
Jonathan Peyton30419822017-05-12 18:01:32 +00004173 /* first, try to get one from the thread pool */
4174 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004175
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004176 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004177 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4178 if (new_thr == __kmp_thread_pool_insert_pt) {
4179 __kmp_thread_pool_insert_pt = NULL;
4180 }
4181 TCW_4(new_thr->th.th_in_pool, FALSE);
4182 // Don't touch th_active_in_pool or th_active.
4183 // The worker thread adjusts those flags as it sleeps/awakens.
4184 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004185
Jonathan Peyton30419822017-05-12 18:01:32 +00004186 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4187 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4188 KMP_ASSERT(!new_thr->th.th_team);
4189 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4190 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004191
Jonathan Peyton30419822017-05-12 18:01:32 +00004192 /* setup the thread structure */
4193 __kmp_initialize_info(new_thr, team, new_tid,
4194 new_thr->th.th_info.ds.ds_gtid);
4195 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004196
Jonathan Peyton30419822017-05-12 18:01:32 +00004197 TCW_4(__kmp_nth, __kmp_nth + 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00004198 root->r.r_cg_nthreads++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004199
Jonathan Peyton30419822017-05-12 18:01:32 +00004200 new_thr->th.th_task_state = 0;
4201 new_thr->th.th_task_state_top = 0;
4202 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004203
Jim Cownie5e8470a2013-09-27 10:38:44 +00004204#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004205 /* Adjust blocktime back to zero if necessary */
4206 /* Middle initialization might not have occurred yet */
4207 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4208 if (__kmp_nth > __kmp_avail_proc) {
4209 __kmp_zero_bt = TRUE;
4210 }
4211 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004212#endif /* KMP_ADJUST_BLOCKTIME */
4213
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004214#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004215 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4216 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004217 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004218 kmp_balign_t *balign = new_thr->th.th_bar;
4219 for (b = 0; b < bs_last_barrier; ++b)
4220 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004221#endif
4222
Jonathan Peyton30419822017-05-12 18:01:32 +00004223 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4224 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226 KMP_MB();
4227 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004228 }
4229
4230 /* no, well fork a new one */
4231 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4232 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4233
4234#if KMP_USE_MONITOR
4235 // If this is the first worker thread the RTL is creating, then also
4236 // launch the monitor thread. We try to do this as early as possible.
4237 if (!TCR_4(__kmp_init_monitor)) {
4238 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4239 if (!TCR_4(__kmp_init_monitor)) {
4240 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4241 TCW_4(__kmp_init_monitor, 1);
4242 __kmp_create_monitor(&__kmp_monitor);
4243 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4244#if KMP_OS_WINDOWS
4245 // AC: wait until monitor has started. This is a fix for CQ232808.
4246 // The reason is that if the library is loaded/unloaded in a loop with
4247 // small (parallel) work in between, then there is high probability that
4248 // monitor thread started after the library shutdown. At shutdown it is
4249 // too late to cope with the problem, because when the master is in
4250 // DllMain (process detach) the monitor has no chances to start (it is
4251 // blocked), and master has no means to inform the monitor that the
4252 // library has gone, because all the memory which the monitor can access
4253 // is going to be released/reset.
4254 while (TCR_4(__kmp_init_monitor) < 2) {
4255 KMP_YIELD(TRUE);
4256 }
4257 KF_TRACE(10, ("after monitor thread has started\n"));
4258#endif
4259 }
4260 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4261 }
4262#endif
4263
4264 KMP_MB();
4265 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4266 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4267 }
4268
4269 /* allocate space for it. */
4270 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4271
4272 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4273
4274 if (__kmp_storage_map) {
4275 __kmp_print_thread_storage_map(new_thr, new_gtid);
4276 }
4277
4278 // add the reserve serialized team, initialized from the team's master thread
4279 {
4280 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4281 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4282 new_thr->th.th_serial_team = serial_team =
4283 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4284#if OMPT_SUPPORT
4285 0, // root parallel id
4286#endif
4287#if OMP_40_ENABLED
4288 proc_bind_default,
4289#endif
4290 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4291 }
4292 KMP_ASSERT(serial_team);
4293 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4294 // execution (it is unused for now).
4295 serial_team->t.t_threads[0] = new_thr;
4296 KF_TRACE(10,
4297 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4298 new_thr));
4299
4300 /* setup the thread structures */
4301 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4302
4303#if USE_FAST_MEMORY
4304 __kmp_initialize_fast_memory(new_thr);
4305#endif /* USE_FAST_MEMORY */
4306
4307#if KMP_USE_BGET
4308 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4309 __kmp_initialize_bget(new_thr);
4310#endif
4311
4312 __kmp_init_random(new_thr); // Initialize random number generator
4313
4314 /* Initialize these only once when thread is grabbed for a team allocation */
4315 KA_TRACE(20,
4316 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4317 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4318
4319 int b;
4320 kmp_balign_t *balign = new_thr->th.th_bar;
4321 for (b = 0; b < bs_last_barrier; ++b) {
4322 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4323 balign[b].bb.team = NULL;
4324 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4325 balign[b].bb.use_oncore_barrier = 0;
4326 }
4327
4328 new_thr->th.th_spin_here = FALSE;
4329 new_thr->th.th_next_waiting = 0;
4330
4331#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4332 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4333 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4334 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4335 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4336#endif
4337
4338 TCW_4(new_thr->th.th_in_pool, FALSE);
4339 new_thr->th.th_active_in_pool = FALSE;
4340 TCW_4(new_thr->th.th_active, TRUE);
4341
4342 /* adjust the global counters */
4343 __kmp_all_nth++;
4344 __kmp_nth++;
4345
Jonathan Peytonf4392462017-07-27 20:58:41 +00004346 root->r.r_cg_nthreads++;
4347
Jonathan Peyton30419822017-05-12 18:01:32 +00004348 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4349 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4350 if (__kmp_adjust_gtid_mode) {
4351 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4352 if (TCR_4(__kmp_gtid_mode) != 2) {
4353 TCW_4(__kmp_gtid_mode, 2);
4354 }
4355 } else {
4356 if (TCR_4(__kmp_gtid_mode) != 1) {
4357 TCW_4(__kmp_gtid_mode, 1);
4358 }
4359 }
4360 }
4361
4362#ifdef KMP_ADJUST_BLOCKTIME
4363 /* Adjust blocktime back to zero if necessary */
4364 /* Middle initialization might not have occurred yet */
4365 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4366 if (__kmp_nth > __kmp_avail_proc) {
4367 __kmp_zero_bt = TRUE;
4368 }
4369 }
4370#endif /* KMP_ADJUST_BLOCKTIME */
4371
4372 /* actually fork it and create the new worker thread */
4373 KF_TRACE(
4374 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4375 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4376 KF_TRACE(10,
4377 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4378
4379 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4380 new_gtid));
4381 KMP_MB();
4382 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004383}
4384
Jonathan Peyton30419822017-05-12 18:01:32 +00004385/* Reinitialize team for reuse.
4386 The hot team code calls this case at every fork barrier, so EPCC barrier
4387 test are extremely sensitive to changes in it, esp. writes to the team
4388 struct, which cause a cache invalidation in all threads.
4389 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4390static void __kmp_reinitialize_team(kmp_team_t *team,
4391 kmp_internal_control_t *new_icvs,
4392 ident_t *loc) {
4393 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4394 team->t.t_threads[0], team));
4395 KMP_DEBUG_ASSERT(team && new_icvs);
4396 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4397 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
Jonathan Peyton30419822017-05-12 18:01:32 +00004399 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004400 // Copy ICVs to the master thread's implicit taskdata
4401 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4402 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004403
Jonathan Peyton30419822017-05-12 18:01:32 +00004404 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4405 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004406}
4407
Jonathan Peyton30419822017-05-12 18:01:32 +00004408/* Initialize the team data structure.
4409 This assumes the t_threads and t_max_nproc are already set.
4410 Also, we don't touch the arguments */
4411static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4412 kmp_internal_control_t *new_icvs,
4413 ident_t *loc) {
4414 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004415
Jonathan Peyton30419822017-05-12 18:01:32 +00004416 /* verify */
4417 KMP_DEBUG_ASSERT(team);
4418 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4419 KMP_DEBUG_ASSERT(team->t.t_threads);
4420 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004421
Jonathan Peyton30419822017-05-12 18:01:32 +00004422 team->t.t_master_tid = 0; /* not needed */
4423 /* team->t.t_master_bar; not needed */
4424 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4425 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004426
Jonathan Peyton30419822017-05-12 18:01:32 +00004427 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4428 team->t.t_next_pool = NULL;
4429 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4430 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004431
Jonathan Peyton30419822017-05-12 18:01:32 +00004432 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4433 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004434
Jonathan Peyton30419822017-05-12 18:01:32 +00004435 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4436 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004437
4438#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004439 team->t.t_fp_control_saved = FALSE; /* not needed */
4440 team->t.t_x87_fpu_control_word = 0; /* not needed */
4441 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004442#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4443
Jonathan Peyton30419822017-05-12 18:01:32 +00004444 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004445
Jonathan Peyton30419822017-05-12 18:01:32 +00004446 team->t.t_ordered.dt.t_value = 0;
4447 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004448
Jonathan Peyton30419822017-05-12 18:01:32 +00004449 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004450
4451#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004452 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004453#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004454 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004455
Jonathan Peyton30419822017-05-12 18:01:32 +00004456 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457
Jonathan Peyton30419822017-05-12 18:01:32 +00004458 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004459
Jonathan Peyton30419822017-05-12 18:01:32 +00004460 KMP_MB();
4461 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004462}
4463
Alp Toker98758b02014-03-02 04:12:06 +00004464#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004465/* Sets full mask for thread and returns old mask, no changes to structures. */
4466static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004467__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4468 if (KMP_AFFINITY_CAPABLE()) {
4469 int status;
4470 if (old_mask != NULL) {
4471 status = __kmp_get_system_affinity(old_mask, TRUE);
4472 int error = errno;
4473 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004474 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4475 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004476 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004477 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004478 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4479 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004480}
4481#endif
4482
Alp Toker98758b02014-03-02 04:12:06 +00004483#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
Jim Cownie5e8470a2013-09-27 10:38:44 +00004485// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4486// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004487// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004488// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004489static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4490 // Copy the master thread's place partion to the team struct
4491 kmp_info_t *master_th = team->t.t_threads[0];
4492 KMP_DEBUG_ASSERT(master_th != NULL);
4493 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4494 int first_place = master_th->th.th_first_place;
4495 int last_place = master_th->th.th_last_place;
4496 int masters_place = master_th->th.th_current_place;
4497 team->t.t_first_place = first_place;
4498 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004499
Jonathan Peyton30419822017-05-12 18:01:32 +00004500 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4501 "bound to place %d partition = [%d,%d]\n",
4502 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4503 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004504
Jonathan Peyton30419822017-05-12 18:01:32 +00004505 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004506
Jonathan Peyton30419822017-05-12 18:01:32 +00004507 case proc_bind_default:
4508 // serial teams might have the proc_bind policy set to proc_bind_default. It
4509 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4510 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4511 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004512
Jonathan Peyton30419822017-05-12 18:01:32 +00004513 case proc_bind_master: {
4514 int f;
4515 int n_th = team->t.t_nproc;
4516 for (f = 1; f < n_th; f++) {
4517 kmp_info_t *th = team->t.t_threads[f];
4518 KMP_DEBUG_ASSERT(th != NULL);
4519 th->th.th_first_place = first_place;
4520 th->th.th_last_place = last_place;
4521 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004522
Jonathan Peyton30419822017-05-12 18:01:32 +00004523 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4524 "partition = [%d,%d]\n",
4525 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4526 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004527 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004528 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004529
Jonathan Peyton30419822017-05-12 18:01:32 +00004530 case proc_bind_close: {
4531 int f;
4532 int n_th = team->t.t_nproc;
4533 int n_places;
4534 if (first_place <= last_place) {
4535 n_places = last_place - first_place + 1;
4536 } else {
4537 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4538 }
4539 if (n_th <= n_places) {
4540 int place = masters_place;
4541 for (f = 1; f < n_th; f++) {
4542 kmp_info_t *th = team->t.t_threads[f];
4543 KMP_DEBUG_ASSERT(th != NULL);
4544
4545 if (place == last_place) {
4546 place = first_place;
4547 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4548 place = 0;
4549 } else {
4550 place++;
4551 }
4552 th->th.th_first_place = first_place;
4553 th->th.th_last_place = last_place;
4554 th->th.th_new_place = place;
4555
4556 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4557 "partition = [%d,%d]\n",
4558 __kmp_gtid_from_thread(team->t.t_threads[f]),
4559 team->t.t_id, f, place, first_place, last_place));
4560 }
4561 } else {
4562 int S, rem, gap, s_count;
4563 S = n_th / n_places;
4564 s_count = 0;
4565 rem = n_th - (S * n_places);
4566 gap = rem > 0 ? n_places / rem : n_places;
4567 int place = masters_place;
4568 int gap_ct = gap;
4569 for (f = 0; f < n_th; f++) {
4570 kmp_info_t *th = team->t.t_threads[f];
4571 KMP_DEBUG_ASSERT(th != NULL);
4572
4573 th->th.th_first_place = first_place;
4574 th->th.th_last_place = last_place;
4575 th->th.th_new_place = place;
4576 s_count++;
4577
4578 if ((s_count == S) && rem && (gap_ct == gap)) {
4579 // do nothing, add an extra thread to place on next iteration
4580 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4581 // we added an extra thread to this place; move to next place
4582 if (place == last_place) {
4583 place = first_place;
4584 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4585 place = 0;
4586 } else {
4587 place++;
4588 }
4589 s_count = 0;
4590 gap_ct = 1;
4591 rem--;
4592 } else if (s_count == S) { // place full; don't add extra
4593 if (place == last_place) {
4594 place = first_place;
4595 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4596 place = 0;
4597 } else {
4598 place++;
4599 }
4600 gap_ct++;
4601 s_count = 0;
4602 }
4603
4604 KA_TRACE(100,
4605 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4606 "partition = [%d,%d]\n",
4607 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4608 th->th.th_new_place, first_place, last_place));
4609 }
4610 KMP_DEBUG_ASSERT(place == masters_place);
4611 }
4612 } break;
4613
4614 case proc_bind_spread: {
4615 int f;
4616 int n_th = team->t.t_nproc;
4617 int n_places;
4618 int thidx;
4619 if (first_place <= last_place) {
4620 n_places = last_place - first_place + 1;
4621 } else {
4622 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4623 }
4624 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004625 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004626
Paul Osmialowskia0162792017-08-10 23:04:11 +00004627 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4628 int S = n_places / n_th;
4629 int s_count, rem, gap, gap_ct;
4630
4631 place = masters_place;
4632 rem = n_places - n_th * S;
4633 gap = rem ? n_th / rem : 1;
4634 gap_ct = gap;
4635 thidx = n_th;
4636 if (update_master_only == 1)
4637 thidx = 1;
4638 for (f = 0; f < thidx; f++) {
4639 kmp_info_t *th = team->t.t_threads[f];
4640 KMP_DEBUG_ASSERT(th != NULL);
4641
4642 th->th.th_first_place = place;
4643 th->th.th_new_place = place;
4644 s_count = 1;
4645 while (s_count < S) {
4646 if (place == last_place) {
4647 place = first_place;
4648 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4649 place = 0;
4650 } else {
4651 place++;
4652 }
4653 s_count++;
4654 }
4655 if (rem && (gap_ct == gap)) {
4656 if (place == last_place) {
4657 place = first_place;
4658 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4659 place = 0;
4660 } else {
4661 place++;
4662 }
4663 rem--;
4664 gap_ct = 0;
4665 }
4666 th->th.th_last_place = place;
4667 gap_ct++;
4668
Jonathan Peyton30419822017-05-12 18:01:32 +00004669 if (place == last_place) {
4670 place = first_place;
4671 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4672 place = 0;
4673 } else {
4674 place++;
4675 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004676
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004677 KA_TRACE(100,
4678 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4679 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4680 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4681 f, th->th.th_new_place, th->th.th_first_place,
4682 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004683 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004684 } else {
4685 /* Having uniform space of available computation places I can create
4686 T partitions of round(P/T) size and put threads into the first
4687 place of each partition. */
4688 double current = static_cast<double>(masters_place);
4689 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004690 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004691 int first, last;
4692 kmp_info_t *th;
4693
4694 thidx = n_th + 1;
4695 if (update_master_only == 1)
4696 thidx = 1;
4697 for (f = 0; f < thidx; f++) {
4698 first = static_cast<int>(current);
4699 last = static_cast<int>(current + spacing) - 1;
4700 KMP_DEBUG_ASSERT(last >= first);
4701 if (first >= n_places) {
4702 if (masters_place) {
4703 first -= n_places;
4704 last -= n_places;
4705 if (first == (masters_place + 1)) {
4706 KMP_DEBUG_ASSERT(f == n_th);
4707 first--;
4708 }
4709 if (last == masters_place) {
4710 KMP_DEBUG_ASSERT(f == (n_th - 1));
4711 last--;
4712 }
4713 } else {
4714 KMP_DEBUG_ASSERT(f == n_th);
4715 first = 0;
4716 last = 0;
4717 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004718 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004719 if (last >= n_places) {
4720 last = (n_places - 1);
4721 }
4722 place = first;
4723 current += spacing;
4724 if (f < n_th) {
4725 KMP_DEBUG_ASSERT(0 <= first);
4726 KMP_DEBUG_ASSERT(n_places > first);
4727 KMP_DEBUG_ASSERT(0 <= last);
4728 KMP_DEBUG_ASSERT(n_places > last);
4729 KMP_DEBUG_ASSERT(last_place >= first_place);
4730 th = team->t.t_threads[f];
4731 KMP_DEBUG_ASSERT(th);
4732 th->th.th_first_place = first;
4733 th->th.th_new_place = place;
4734 th->th.th_last_place = last;
Jonathan Peyton30419822017-05-12 18:01:32 +00004735
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004736 KA_TRACE(100,
4737 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4738 "partition = [%d,%d], spacing = %.4f\n",
4739 __kmp_gtid_from_thread(team->t.t_threads[f]),
4740 team->t.t_id, f, th->th.th_new_place,
4741 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004742 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004743 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004744 }
4745 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4746 } else {
4747 int S, rem, gap, s_count;
4748 S = n_th / n_places;
4749 s_count = 0;
4750 rem = n_th - (S * n_places);
4751 gap = rem > 0 ? n_places / rem : n_places;
4752 int place = masters_place;
4753 int gap_ct = gap;
4754 thidx = n_th;
4755 if (update_master_only == 1)
4756 thidx = 1;
4757 for (f = 0; f < thidx; f++) {
4758 kmp_info_t *th = team->t.t_threads[f];
4759 KMP_DEBUG_ASSERT(th != NULL);
4760
4761 th->th.th_first_place = place;
4762 th->th.th_last_place = place;
4763 th->th.th_new_place = place;
4764 s_count++;
4765
4766 if ((s_count == S) && rem && (gap_ct == gap)) {
4767 // do nothing, add an extra thread to place on next iteration
4768 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4769 // we added an extra thread to this place; move on to next place
4770 if (place == last_place) {
4771 place = first_place;
4772 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4773 place = 0;
4774 } else {
4775 place++;
4776 }
4777 s_count = 0;
4778 gap_ct = 1;
4779 rem--;
4780 } else if (s_count == S) { // place is full; don't add extra thread
4781 if (place == last_place) {
4782 place = first_place;
4783 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4784 place = 0;
4785 } else {
4786 place++;
4787 }
4788 gap_ct++;
4789 s_count = 0;
4790 }
4791
4792 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4793 "partition = [%d,%d]\n",
4794 __kmp_gtid_from_thread(team->t.t_threads[f]),
4795 team->t.t_id, f, th->th.th_new_place,
4796 th->th.th_first_place, th->th.th_last_place));
4797 }
4798 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4799 }
4800 } break;
4801
4802 default:
4803 break;
4804 }
4805
4806 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004807}
4808
Alp Toker98758b02014-03-02 04:12:06 +00004809#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004810
Jonathan Peyton30419822017-05-12 18:01:32 +00004811/* allocate a new team data structure to use. take one off of the free pool if
4812 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004813kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004814__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004815#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004816 ompt_parallel_id_t ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004817#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004818#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004819 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004820#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004821 kmp_internal_control_t *new_icvs,
4822 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4823 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4824 int f;
4825 kmp_team_t *team;
4826 int use_hot_team = !root->r.r_active;
4827 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004828
Jonathan Peyton30419822017-05-12 18:01:32 +00004829 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4830 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4831 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4832 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004833
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004834#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004835 kmp_hot_team_ptr_t *hot_teams;
4836 if (master) {
4837 team = master->th.th_team;
4838 level = team->t.t_active_level;
4839 if (master->th.th_teams_microtask) { // in teams construct?
4840 if (master->th.th_teams_size.nteams > 1 &&
4841 ( // #teams > 1
4842 team->t.t_pkfn ==
4843 (microtask_t)__kmp_teams_master || // inner fork of the teams
4844 master->th.th_teams_level <
4845 team->t.t_level)) { // or nested parallel inside the teams
4846 ++level; // not increment if #teams==1, or for outer fork of the teams;
4847 // increment otherwise
4848 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004849 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004850 hot_teams = master->th.th_hot_teams;
4851 if (level < __kmp_hot_teams_max_level && hot_teams &&
4852 hot_teams[level]
4853 .hot_team) { // hot team has already been allocated for given level
4854 use_hot_team = 1;
4855 } else {
4856 use_hot_team = 0;
4857 }
4858 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004859#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004860 // Optimization to use a "hot" team
4861 if (use_hot_team && new_nproc > 1) {
4862 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004863#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004864 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004865#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004866 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004867#endif
4868#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004869 if (__kmp_tasking_mode != tskm_immediate_exec) {
4870 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4871 "task_team[1] = %p before reinit\n",
4872 team->t.t_task_team[0], team->t.t_task_team[1]));
4873 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004874#endif
4875
Jonathan Peyton30419822017-05-12 18:01:32 +00004876 // Has the number of threads changed?
4877 /* Let's assume the most common case is that the number of threads is
4878 unchanged, and put that case first. */
4879 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4880 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4881 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004882 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004883 if (team->t.t_size_changed == -1) {
4884 team->t.t_size_changed = 1;
4885 } else {
4886 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4887 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004888
Jonathan Peyton30419822017-05-12 18:01:32 +00004889 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4890 kmp_r_sched_t new_sched = new_icvs->sched;
4891 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4892 team->t.t_sched.chunk != new_sched.chunk)
4893 team->t.t_sched =
4894 new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004895
Jonathan Peyton30419822017-05-12 18:01:32 +00004896 __kmp_reinitialize_team(team, new_icvs,
4897 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004898
Jonathan Peyton30419822017-05-12 18:01:32 +00004899 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4900 team->t.t_threads[0], team));
4901 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004902
4903#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004904#if KMP_AFFINITY_SUPPORTED
4905 if ((team->t.t_size_changed == 0) &&
4906 (team->t.t_proc_bind == new_proc_bind)) {
4907 if (new_proc_bind == proc_bind_spread) {
4908 __kmp_partition_places(
4909 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004910 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004911 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4912 "proc_bind = %d, partition = [%d,%d]\n",
4913 team->t.t_id, new_proc_bind, team->t.t_first_place,
4914 team->t.t_last_place));
4915 } else {
4916 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4917 __kmp_partition_places(team);
4918 }
4919#else
4920 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4921#endif /* KMP_AFFINITY_SUPPORTED */
4922#endif /* OMP_40_ENABLED */
4923 } else if (team->t.t_nproc > new_nproc) {
4924 KA_TRACE(20,
4925 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4926 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004927
Jonathan Peyton30419822017-05-12 18:01:32 +00004928 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004929#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004930 if (__kmp_hot_teams_mode == 0) {
4931 // AC: saved number of threads should correspond to team's value in this
4932 // mode, can be bigger in mode 1, when hot team has threads in reserve
4933 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4934 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004935#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004936 /* release the extra threads we don't need any more */
4937 for (f = new_nproc; f < team->t.t_nproc; f++) {
4938 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4939 if (__kmp_tasking_mode != tskm_immediate_exec) {
4940 // When decreasing team size, threads no longer in the team should
4941 // unref task team.
4942 team->t.t_threads[f]->th.th_task_team = NULL;
4943 }
4944 __kmp_free_thread(team->t.t_threads[f]);
4945 team->t.t_threads[f] = NULL;
4946 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004947#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004948 } // (__kmp_hot_teams_mode == 0)
4949 else {
4950 // When keeping extra threads in team, switch threads to wait on own
4951 // b_go flag
4952 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4953 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4954 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4955 for (int b = 0; b < bs_last_barrier; ++b) {
4956 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4957 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004958 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004959 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4960 }
4961 }
4962 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004963#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004964 team->t.t_nproc = new_nproc;
4965 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4966 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4967 team->t.t_sched.chunk != new_icvs->sched.chunk)
4968 team->t.t_sched = new_icvs->sched;
4969 __kmp_reinitialize_team(team, new_icvs,
4970 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004971
Jonathan Peyton30419822017-05-12 18:01:32 +00004972 /* update the remaining threads */
4973 for (f = 0; f < new_nproc; ++f) {
4974 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4975 }
4976 // restore the current task state of the master thread: should be the
4977 // implicit task
4978 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4979 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004980
Jonathan Peyton30419822017-05-12 18:01:32 +00004981 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004982
4983#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004984 for (f = 0; f < team->t.t_nproc; f++) {
4985 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4986 team->t.t_threads[f]->th.th_team_nproc ==
4987 team->t.t_nproc);
4988 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004989#endif
4990
4991#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004992 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4993#if KMP_AFFINITY_SUPPORTED
4994 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004995#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004996#endif
4997 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004998#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00004999 kmp_affin_mask_t *old_mask;
5000 if (KMP_AFFINITY_CAPABLE()) {
5001 KMP_CPU_ALLOC(old_mask);
5002 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005003#endif
5004
Jonathan Peyton30419822017-05-12 18:01:32 +00005005 KA_TRACE(20,
5006 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5007 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005008
Jonathan Peyton30419822017-05-12 18:01:32 +00005009 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005010
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005011#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005012 int avail_threads = hot_teams[level].hot_team_nth;
5013 if (new_nproc < avail_threads)
5014 avail_threads = new_nproc;
5015 kmp_info_t **other_threads = team->t.t_threads;
5016 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5017 // Adjust barrier data of reserved threads (if any) of the team
5018 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005019 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005020 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5021 for (b = 0; b < bs_last_barrier; ++b) {
5022 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5023 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005024#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005025 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005026#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005027 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005028 }
5029 if (hot_teams[level].hot_team_nth >= new_nproc) {
5030 // we have all needed threads in reserve, no need to allocate any
5031 // this only possible in mode 1, cannot have reserved threads in mode 0
5032 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5033 team->t.t_nproc = new_nproc; // just get reserved threads involved
5034 } else {
5035 // we may have some threads in reserve, but not enough
5036 team->t.t_nproc =
5037 hot_teams[level]
5038 .hot_team_nth; // get reserved threads involved if any
5039 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5040#endif // KMP_NESTED_HOT_TEAMS
5041 if (team->t.t_max_nproc < new_nproc) {
5042 /* reallocate larger arrays */
5043 __kmp_reallocate_team_arrays(team, new_nproc);
5044 __kmp_reinitialize_team(team, new_icvs, NULL);
5045 }
5046
5047#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5048 /* Temporarily set full mask for master thread before creation of
5049 workers. The reason is that workers inherit the affinity from master,
5050 so if a lot of workers are created on the single core quickly, they
5051 don't get a chance to set their own affinity for a long time. */
5052 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5053#endif
5054
5055 /* allocate new threads for the hot team */
5056 for (f = team->t.t_nproc; f < new_nproc; f++) {
5057 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5058 KMP_DEBUG_ASSERT(new_worker);
5059 team->t.t_threads[f] = new_worker;
5060
5061 KA_TRACE(20,
5062 ("__kmp_allocate_team: team %d init T#%d arrived: "
5063 "join=%llu, plain=%llu\n",
5064 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5065 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5066 team->t.t_bar[bs_plain_barrier].b_arrived));
5067
5068 { // Initialize barrier data for new threads.
5069 int b;
5070 kmp_balign_t *balign = new_worker->th.th_bar;
5071 for (b = 0; b < bs_last_barrier; ++b) {
5072 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5073 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5074 KMP_BARRIER_PARENT_FLAG);
5075#if USE_DEBUGGER
5076 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5077#endif
5078 }
5079 }
5080 }
5081
5082#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5083 if (KMP_AFFINITY_CAPABLE()) {
5084 /* Restore initial master thread's affinity mask */
5085 __kmp_set_system_affinity(old_mask, TRUE);
5086 KMP_CPU_FREE(old_mask);
5087 }
5088#endif
5089#if KMP_NESTED_HOT_TEAMS
5090 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5091#endif // KMP_NESTED_HOT_TEAMS
5092 /* make sure everyone is syncronized */
5093 int old_nproc = team->t.t_nproc; // save old value and use to update only
5094 // new threads below
5095 __kmp_initialize_team(team, new_nproc, new_icvs,
5096 root->r.r_uber_thread->th.th_ident);
5097
5098 /* reinitialize the threads */
5099 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5100 for (f = 0; f < team->t.t_nproc; ++f)
5101 __kmp_initialize_info(team->t.t_threads[f], team, f,
5102 __kmp_gtid_from_tid(f, team));
5103 if (level) { // set th_task_state for new threads in nested hot team
5104 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5105 // only need to set the th_task_state for the new threads. th_task_state
5106 // for master thread will not be accurate until after this in
5107 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5108 // correct value.
5109 for (f = old_nproc; f < team->t.t_nproc; ++f)
5110 team->t.t_threads[f]->th.th_task_state =
5111 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5112 } else { // set th_task_state for new threads in non-nested hot team
5113 int old_state =
5114 team->t.t_threads[0]->th.th_task_state; // copy master's state
5115 for (f = old_nproc; f < team->t.t_nproc; ++f)
5116 team->t.t_threads[f]->th.th_task_state = old_state;
5117 }
5118
5119#ifdef KMP_DEBUG
5120 for (f = 0; f < team->t.t_nproc; ++f) {
5121 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5122 team->t.t_threads[f]->th.th_team_nproc ==
5123 team->t.t_nproc);
5124 }
5125#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005126
5127#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005128 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5129#if KMP_AFFINITY_SUPPORTED
5130 __kmp_partition_places(team);
5131#endif
5132#endif
5133 } // Check changes in number of threads
5134
5135#if OMP_40_ENABLED
5136 kmp_info_t *master = team->t.t_threads[0];
5137 if (master->th.th_teams_microtask) {
5138 for (f = 1; f < new_nproc; ++f) {
5139 // propagate teams construct specific info to workers
5140 kmp_info_t *thr = team->t.t_threads[f];
5141 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5142 thr->th.th_teams_level = master->th.th_teams_level;
5143 thr->th.th_teams_size = master->th.th_teams_size;
5144 }
5145 }
5146#endif /* OMP_40_ENABLED */
5147#if KMP_NESTED_HOT_TEAMS
5148 if (level) {
5149 // Sync barrier state for nested hot teams, not needed for outermost hot
5150 // team.
5151 for (f = 1; f < new_nproc; ++f) {
5152 kmp_info_t *thr = team->t.t_threads[f];
5153 int b;
5154 kmp_balign_t *balign = thr->th.th_bar;
5155 for (b = 0; b < bs_last_barrier; ++b) {
5156 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5157 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5158#if USE_DEBUGGER
5159 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5160#endif
5161 }
5162 }
5163 }
5164#endif // KMP_NESTED_HOT_TEAMS
5165
5166 /* reallocate space for arguments if necessary */
5167 __kmp_alloc_argv_entries(argc, team, TRUE);
5168 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5169 // The hot team re-uses the previous task team,
5170 // if untouched during the previous release->gather phase.
5171
5172 KF_TRACE(10, (" hot_team = %p\n", team));
5173
5174#if KMP_DEBUG
5175 if (__kmp_tasking_mode != tskm_immediate_exec) {
5176 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5177 "task_team[1] = %p after reinit\n",
5178 team->t.t_task_team[0], team->t.t_task_team[1]));
5179 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005180#endif
5181
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005182#if OMPT_SUPPORT
5183 __ompt_team_assign_id(team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005184#endif
5185
Jim Cownie5e8470a2013-09-27 10:38:44 +00005186 KMP_MB();
5187
Jim Cownie5e8470a2013-09-27 10:38:44 +00005188 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005189 }
5190
5191 /* next, let's try to take one from the team pool */
5192 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005193 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005194 /* TODO: consider resizing undersized teams instead of reaping them, now
5195 that we have a resizing mechanism */
5196 if (team->t.t_max_nproc >= max_nproc) {
5197 /* take this team from the team pool */
5198 __kmp_team_pool = team->t.t_next_pool;
5199
5200 /* setup the team for fresh use */
5201 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5202
5203 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5204 "task_team[1] %p to NULL\n",
5205 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5206 team->t.t_task_team[0] = NULL;
5207 team->t.t_task_team[1] = NULL;
5208
5209 /* reallocate space for arguments if necessary */
5210 __kmp_alloc_argv_entries(argc, team, TRUE);
5211 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5212
5213 KA_TRACE(
5214 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5215 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5216 { // Initialize barrier data.
5217 int b;
5218 for (b = 0; b < bs_last_barrier; ++b) {
5219 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5220#if USE_DEBUGGER
5221 team->t.t_bar[b].b_master_arrived = 0;
5222 team->t.t_bar[b].b_team_arrived = 0;
5223#endif
5224 }
5225 }
5226
5227#if OMP_40_ENABLED
5228 team->t.t_proc_bind = new_proc_bind;
5229#endif
5230
5231 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5232 team->t.t_id));
5233
5234#if OMPT_SUPPORT
5235 __ompt_team_assign_id(team, ompt_parallel_id);
5236#endif
5237
5238 KMP_MB();
5239
5240 return team;
5241 }
5242
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005243 /* reap team if it is too small, then loop back and check the next one */
5244 // not sure if this is wise, but, will be redone during the hot-teams
5245 // rewrite.
5246 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005247 team = __kmp_reap_team(team);
5248 __kmp_team_pool = team;
5249 }
5250
5251 /* nothing available in the pool, no matter, make a new team! */
5252 KMP_MB();
5253 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5254
5255 /* and set it up */
5256 team->t.t_max_nproc = max_nproc;
5257 /* NOTE well, for some reason allocating one big buffer and dividing it up
5258 seems to really hurt performance a lot on the P4, so, let's not use this */
5259 __kmp_allocate_team_arrays(team, max_nproc);
5260
5261 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5262 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5263
5264 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5265 "%p to NULL\n",
5266 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5267 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5268 // memory, no need to duplicate
5269 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5270 // memory, no need to duplicate
5271
5272 if (__kmp_storage_map) {
5273 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5274 }
5275
5276 /* allocate space for arguments */
5277 __kmp_alloc_argv_entries(argc, team, FALSE);
5278 team->t.t_argc = argc;
5279
5280 KA_TRACE(20,
5281 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5282 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5283 { // Initialize barrier data.
5284 int b;
5285 for (b = 0; b < bs_last_barrier; ++b) {
5286 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5287#if USE_DEBUGGER
5288 team->t.t_bar[b].b_master_arrived = 0;
5289 team->t.t_bar[b].b_team_arrived = 0;
5290#endif
5291 }
5292 }
5293
5294#if OMP_40_ENABLED
5295 team->t.t_proc_bind = new_proc_bind;
5296#endif
5297
5298#if OMPT_SUPPORT
5299 __ompt_team_assign_id(team, ompt_parallel_id);
5300 team->t.ompt_serialized_team_info = NULL;
5301#endif
5302
5303 KMP_MB();
5304
5305 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5306 team->t.t_id));
5307
5308 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005309}
5310
5311/* TODO implement hot-teams at all levels */
5312/* TODO implement lazy thread release on demand (disband request) */
5313
5314/* free the team. return it to the team pool. release all the threads
5315 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005316void __kmp_free_team(kmp_root_t *root,
5317 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5318 int f;
5319 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5320 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005321
Jonathan Peyton30419822017-05-12 18:01:32 +00005322 /* verify state */
5323 KMP_DEBUG_ASSERT(root);
5324 KMP_DEBUG_ASSERT(team);
5325 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5326 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005327
Jonathan Peyton30419822017-05-12 18:01:32 +00005328 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005329#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005330 int level;
5331 kmp_hot_team_ptr_t *hot_teams;
5332 if (master) {
5333 level = team->t.t_active_level - 1;
5334 if (master->th.th_teams_microtask) { // in teams construct?
5335 if (master->th.th_teams_size.nteams > 1) {
5336 ++level; // level was not increased in teams construct for
5337 // team_of_masters
5338 }
5339 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5340 master->th.th_teams_level == team->t.t_level) {
5341 ++level; // level was not increased in teams construct for
5342 // team_of_workers before the parallel
5343 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005344 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005345 hot_teams = master->th.th_hot_teams;
5346 if (level < __kmp_hot_teams_max_level) {
5347 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5348 use_hot_team = 1;
5349 }
5350 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005351#endif // KMP_NESTED_HOT_TEAMS
5352
Jonathan Peyton30419822017-05-12 18:01:32 +00005353 /* team is done working */
5354 TCW_SYNC_PTR(team->t.t_pkfn,
5355 NULL); // Important for Debugging Support Library.
5356 team->t.t_copyin_counter = 0; // init counter for possible reuse
5357 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005358
Jonathan Peyton30419822017-05-12 18:01:32 +00005359 /* if we are non-hot team, release our threads */
5360 if (!use_hot_team) {
5361 if (__kmp_tasking_mode != tskm_immediate_exec) {
5362 // Wait for threads to reach reapable state
5363 for (f = 1; f < team->t.t_nproc; ++f) {
5364 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5365 kmp_info_t *th = team->t.t_threads[f];
5366 volatile kmp_uint32 *state = &th->th.th_reap_state;
5367 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005368#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005369 // On Windows a thread can be killed at any time, check this
5370 DWORD ecode;
5371 if (!__kmp_is_thread_alive(th, &ecode)) {
5372 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5373 break;
5374 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005375#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005376 // first check if thread is sleeping
5377 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5378 if (fl.is_sleeping())
5379 fl.resume(__kmp_gtid_from_thread(th));
5380 KMP_CPU_PAUSE();
5381 }
5382 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005383
Jonathan Peyton30419822017-05-12 18:01:32 +00005384 // Delete task teams
5385 int tt_idx;
5386 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5387 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5388 if (task_team != NULL) {
5389 for (f = 0; f < team->t.t_nproc;
5390 ++f) { // Have all threads unref task teams
5391 team->t.t_threads[f]->th.th_task_team = NULL;
5392 }
5393 KA_TRACE(
5394 20,
5395 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5396 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005397#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005398 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005399#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005400 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005402 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005403 }
5404
Jonathan Peyton30419822017-05-12 18:01:32 +00005405 // Reset pointer to parent team only for non-hot teams.
5406 team->t.t_parent = NULL;
5407 team->t.t_level = 0;
5408 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005409
Jonathan Peyton30419822017-05-12 18:01:32 +00005410 /* free the worker threads */
5411 for (f = 1; f < team->t.t_nproc; ++f) {
5412 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5413 __kmp_free_thread(team->t.t_threads[f]);
5414 team->t.t_threads[f] = NULL;
5415 }
5416
5417 /* put the team back in the team pool */
5418 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005419 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005420 __kmp_team_pool = (volatile kmp_team_t *)team;
5421 }
5422
5423 KMP_MB();
5424}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005425
5426/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005427kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5428 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005429
Jonathan Peyton30419822017-05-12 18:01:32 +00005430 KMP_DEBUG_ASSERT(team);
5431 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5432 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5433 KMP_DEBUG_ASSERT(team->t.t_threads);
5434 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005435
Jonathan Peyton30419822017-05-12 18:01:32 +00005436 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005437
Jonathan Peyton30419822017-05-12 18:01:32 +00005438 /* free stuff */
5439 __kmp_free_team_arrays(team);
5440 if (team->t.t_argv != &team->t.t_inline_argv[0])
5441 __kmp_free((void *)team->t.t_argv);
5442 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005443
Jonathan Peyton30419822017-05-12 18:01:32 +00005444 KMP_MB();
5445 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005446}
5447
Jim Cownie5e8470a2013-09-27 10:38:44 +00005448// Free the thread. Don't reap it, just place it on the pool of available
5449// threads.
5450//
5451// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5452// binding for the affinity mechanism to be useful.
5453//
5454// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5455// However, we want to avoid a potential performance problem by always
5456// scanning through the list to find the correct point at which to insert
5457// the thread (potential N**2 behavior). To do this we keep track of the
5458// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5459// With single-level parallelism, threads will always be added to the tail
5460// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5461// parallelism, all bets are off and we may need to scan through the entire
5462// free list.
5463//
5464// This change also has a potentially large performance benefit, for some
5465// applications. Previously, as threads were freed from the hot team, they
5466// would be placed back on the free list in inverse order. If the hot team
5467// grew back to it's original size, then the freed thread would be placed
5468// back on the hot team in reverse order. This could cause bad cache
5469// locality problems on programs where the size of the hot team regularly
5470// grew and shrunk.
5471//
5472// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005473void __kmp_free_thread(kmp_info_t *this_th) {
5474 int gtid;
5475 kmp_info_t **scan;
Jonathan Peytonf4392462017-07-27 20:58:41 +00005476 kmp_root_t *root = this_th->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005477
Jonathan Peyton30419822017-05-12 18:01:32 +00005478 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5479 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005480
Jonathan Peyton30419822017-05-12 18:01:32 +00005481 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005482
Jonathan Peyton30419822017-05-12 18:01:32 +00005483 // When moving thread to pool, switch thread to wait on own b_go flag, and
5484 // uninitialized (NULL team).
5485 int b;
5486 kmp_balign_t *balign = this_th->th.th_bar;
5487 for (b = 0; b < bs_last_barrier; ++b) {
5488 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5489 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5490 balign[b].bb.team = NULL;
5491 balign[b].bb.leaf_kids = 0;
5492 }
5493 this_th->th.th_task_state = 0;
5494
5495 /* put thread back on the free pool */
5496 TCW_PTR(this_th->th.th_team, NULL);
5497 TCW_PTR(this_th->th.th_root, NULL);
5498 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5499
5500 // If the __kmp_thread_pool_insert_pt is already past the new insert
5501 // point, then we need to re-scan the entire list.
5502 gtid = this_th->th.th_info.ds.ds_gtid;
5503 if (__kmp_thread_pool_insert_pt != NULL) {
5504 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5505 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5506 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005507 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005508 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005509
Jonathan Peyton30419822017-05-12 18:01:32 +00005510 // Scan down the list to find the place to insert the thread.
5511 // scan is the address of a link in the list, possibly the address of
5512 // __kmp_thread_pool itself.
5513 //
5514 // In the absence of nested parallism, the for loop will have 0 iterations.
5515 if (__kmp_thread_pool_insert_pt != NULL) {
5516 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5517 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005518 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005519 }
5520 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5521 scan = &((*scan)->th.th_next_pool))
5522 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005523
Jonathan Peyton30419822017-05-12 18:01:32 +00005524 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5525 // to its address.
5526 TCW_PTR(this_th->th.th_next_pool, *scan);
5527 __kmp_thread_pool_insert_pt = *scan = this_th;
5528 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5529 (this_th->th.th_info.ds.ds_gtid <
5530 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5531 TCW_4(this_th->th.th_in_pool, TRUE);
5532 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005533
Jonathan Peyton30419822017-05-12 18:01:32 +00005534 TCW_4(__kmp_nth, __kmp_nth - 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00005535 root->r.r_cg_nthreads--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005536
5537#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005538 /* Adjust blocktime back to user setting or default if necessary */
5539 /* Middle initialization might never have occurred */
5540 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5541 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5542 if (__kmp_nth <= __kmp_avail_proc) {
5543 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005544 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005545 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005546#endif /* KMP_ADJUST_BLOCKTIME */
5547
Jonathan Peyton30419822017-05-12 18:01:32 +00005548 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005549}
5550
Jim Cownie5e8470a2013-09-27 10:38:44 +00005551/* ------------------------------------------------------------------------ */
5552
Jonathan Peyton30419822017-05-12 18:01:32 +00005553void *__kmp_launch_thread(kmp_info_t *this_thr) {
5554 int gtid = this_thr->th.th_info.ds.ds_gtid;
5555 /* void *stack_data;*/
5556 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005557
Jonathan Peyton30419822017-05-12 18:01:32 +00005558 KMP_MB();
5559 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005560
Jonathan Peyton30419822017-05-12 18:01:32 +00005561 if (__kmp_env_consistency_check) {
5562 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5563 }
5564
5565#if OMPT_SUPPORT
5566 if (ompt_enabled) {
5567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5568 this_thr->th.ompt_thread_info.wait_id = 0;
5569 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5570 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5571 __ompt_thread_begin(ompt_thread_worker, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005572 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005573 }
5574#endif
5575
5576 /* This is the place where threads wait for work */
5577 while (!TCR_4(__kmp_global.g.g_done)) {
5578 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5579 KMP_MB();
5580
5581 /* wait for work to do */
5582 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005583
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005584#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005585 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005586 this_thr->th.ompt_thread_info.state = ompt_state_idle;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005587 }
5588#endif
5589
Jonathan Peyton30419822017-05-12 18:01:32 +00005590 /* No tid yet since not part of a team */
5591 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5592
5593#if OMPT_SUPPORT
5594 if (ompt_enabled) {
5595 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5596 }
5597#endif
5598
5599 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5600
5601 /* have we been allocated? */
5602 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5603#if OMPT_SUPPORT
5604 ompt_task_info_t *task_info;
5605 ompt_parallel_id_t my_parallel_id;
5606 if (ompt_enabled) {
5607 task_info = __ompt_get_taskinfo(0);
5608 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5609 }
5610#endif
5611 /* we were just woken up, so run our new task */
5612 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5613 int rc;
5614 KA_TRACE(20,
5615 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5616 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5617 (*pteam)->t.t_pkfn));
5618
5619 updateHWFPControl(*pteam);
5620
5621#if OMPT_SUPPORT
5622 if (ompt_enabled) {
5623 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5624 // Initialize OMPT task id for implicit task.
5625 int tid = __kmp_tid_from_gtid(gtid);
5626 task_info->task_id = __ompt_task_id_new(tid);
5627 }
5628#endif
5629
5630 {
5631 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5632 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5633 rc = (*pteam)->t.t_invoke(gtid);
5634 }
5635 KMP_ASSERT(rc);
5636
5637#if OMPT_SUPPORT
5638 if (ompt_enabled) {
5639 /* no frame set while outside task */
5640 task_info->frame.exit_runtime_frame = NULL;
5641
5642 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5643 }
5644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005645 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005646 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5647 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5648 (*pteam)->t.t_pkfn));
5649 }
5650 /* join barrier after parallel region */
5651 __kmp_join_barrier(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005652#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00005653 if (ompt_enabled) {
5654 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5655 // don't access *pteam here: it may have already been freed
5656 // by the master thread behind the barrier (possible race)
5657 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5658 my_parallel_id, task_info->task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005659 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005660 task_info->frame.exit_runtime_frame = NULL;
5661 task_info->task_id = 0;
5662 }
5663#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005664 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005665 }
5666 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005667
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005668#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005669 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5670 __ompt_thread_end(ompt_thread_worker, gtid);
5671 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005672#endif
5673
Jonathan Peyton30419822017-05-12 18:01:32 +00005674 this_thr->th.th_task_team = NULL;
5675 /* run the destructors for the threadprivate data for this thread */
5676 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005677
Jonathan Peyton30419822017-05-12 18:01:32 +00005678 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5679 KMP_MB();
5680 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005681}
5682
5683/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005684
Jonathan Peyton30419822017-05-12 18:01:32 +00005685void __kmp_internal_end_dest(void *specific_gtid) {
5686#if KMP_COMPILER_ICC
5687#pragma warning(push)
5688#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5689// significant bits
5690#endif
5691 // Make sure no significant bits are lost
5692 int gtid = (kmp_intptr_t)specific_gtid - 1;
5693#if KMP_COMPILER_ICC
5694#pragma warning(pop)
5695#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005696
Jonathan Peyton30419822017-05-12 18:01:32 +00005697 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5698 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5699 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005700
Jonathan Peyton30419822017-05-12 18:01:32 +00005701 /* josh: One reason for setting the gtid specific data even when it is being
5702 destroyed by pthread is to allow gtid lookup through thread specific data
5703 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5704 that gets executed in the call to __kmp_internal_end_thread, actually
5705 gets the gtid through the thread specific data. Setting it here seems
5706 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5707 to run smoothly.
5708 todo: get rid of this after we remove the dependence on
5709 __kmp_gtid_get_specific */
5710 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5711 __kmp_gtid_set_specific(gtid);
5712#ifdef KMP_TDATA_GTID
5713 __kmp_gtid = gtid;
5714#endif
5715 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005716}
5717
Jonathan Peyton99016992015-05-26 17:32:53 +00005718#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005719
Jonathan Peyton30419822017-05-12 18:01:32 +00005720// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5721// destructors work perfectly, but in real libomp.so I have no evidence it is
5722// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005723
Jonathan Peyton30419822017-05-12 18:01:32 +00005724__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5725 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005726}
5727
Jonathan Peyton30419822017-05-12 18:01:32 +00005728void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005729
5730#endif
5731
Jonathan Peyton30419822017-05-12 18:01:32 +00005732/* [Windows] josh: when the atexit handler is called, there may still be more
5733 than one thread alive */
5734void __kmp_internal_end_atexit(void) {
5735 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5736 /* [Windows]
5737 josh: ideally, we want to completely shutdown the library in this atexit
5738 handler, but stat code that depends on thread specific data for gtid fails
5739 because that data becomes unavailable at some point during the shutdown, so
5740 we call __kmp_internal_end_thread instead. We should eventually remove the
5741 dependency on __kmp_get_specific_gtid in the stat code and use
5742 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005743
Jonathan Peyton30419822017-05-12 18:01:32 +00005744 // TODO: Can some of this comment about GVS be removed?
5745 I suspect that the offending stat code is executed when the calling thread
5746 tries to clean up a dead root thread's data structures, resulting in GVS
5747 code trying to close the GVS structures for that thread, but since the stat
5748 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5749 the calling thread is cleaning up itself instead of another thread, it get
5750 confused. This happens because allowing a thread to unregister and cleanup
5751 another thread is a recent modification for addressing an issue.
5752 Based on the current design (20050722), a thread may end up
5753 trying to unregister another thread only if thread death does not trigger
5754 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5755 thread specific data destructor function to detect thread death. For
5756 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5757 is nothing. Thus, the workaround is applicable only for Windows static
5758 stat library. */
5759 __kmp_internal_end_library(-1);
5760#if KMP_OS_WINDOWS
5761 __kmp_close_console();
5762#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005763}
5764
Jonathan Peyton30419822017-05-12 18:01:32 +00005765static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5766 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005767
Jonathan Peyton30419822017-05-12 18:01:32 +00005768 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005769
Jonathan Peyton30419822017-05-12 18:01:32 +00005770 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005771
Jonathan Peyton30419822017-05-12 18:01:32 +00005772 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005773
Jonathan Peyton30419822017-05-12 18:01:32 +00005774 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005775
Jonathan Peyton30419822017-05-12 18:01:32 +00005776 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5777 /* Assume the threads are at the fork barrier here */
5778 KA_TRACE(
5779 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5780 gtid));
5781 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5782 * (GEH) */
5783 ANNOTATE_HAPPENS_BEFORE(thread);
5784 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5785 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005786 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005787
Jonathan Peyton30419822017-05-12 18:01:32 +00005788 // Terminate OS thread.
5789 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005790
Jonathan Peyton30419822017-05-12 18:01:32 +00005791 // The thread was killed asynchronously. If it was actively
5792 // spinning in the thread pool, decrement the global count.
5793 //
5794 // There is a small timing hole here - if the worker thread was just waking
5795 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5796 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5797 // the global counter might not get updated.
5798 //
5799 // Currently, this can only happen as the library is unloaded,
5800 // so there are no harmful side effects.
5801 if (thread->th.th_active_in_pool) {
5802 thread->th.th_active_in_pool = FALSE;
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00005803 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
Jonathan Peyton30419822017-05-12 18:01:32 +00005804 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5805 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005806
Jonathan Peyton30419822017-05-12 18:01:32 +00005807 // Decrement # of [worker] threads in the pool.
5808 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5809 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005810 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005811
Jonathan Peyton30419822017-05-12 18:01:32 +00005812 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005813
Jonathan Peyton30419822017-05-12 18:01:32 +00005814// Free the fast memory for tasking
5815#if USE_FAST_MEMORY
5816 __kmp_free_fast_memory(thread);
5817#endif /* USE_FAST_MEMORY */
5818
5819 __kmp_suspend_uninitialize_thread(thread);
5820
5821 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5822 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5823
5824 --__kmp_all_nth;
5825// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005826
5827#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005828 /* Adjust blocktime back to user setting or default if necessary */
5829 /* Middle initialization might never have occurred */
5830 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5831 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5832 if (__kmp_nth <= __kmp_avail_proc) {
5833 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005834 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005835 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005836#endif /* KMP_ADJUST_BLOCKTIME */
5837
Jonathan Peyton30419822017-05-12 18:01:32 +00005838 /* free the memory being used */
5839 if (__kmp_env_consistency_check) {
5840 if (thread->th.th_cons) {
5841 __kmp_free_cons_stack(thread->th.th_cons);
5842 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005843 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005844 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005845
Jonathan Peyton30419822017-05-12 18:01:32 +00005846 if (thread->th.th_pri_common != NULL) {
5847 __kmp_free(thread->th.th_pri_common);
5848 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005849 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005850
Jonathan Peyton30419822017-05-12 18:01:32 +00005851 if (thread->th.th_task_state_memo_stack != NULL) {
5852 __kmp_free(thread->th.th_task_state_memo_stack);
5853 thread->th.th_task_state_memo_stack = NULL;
5854 }
5855
5856#if KMP_USE_BGET
5857 if (thread->th.th_local.bget_data != NULL) {
5858 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005859 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861
Alp Toker98758b02014-03-02 04:12:06 +00005862#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005863 if (thread->th.th_affin_mask != NULL) {
5864 KMP_CPU_FREE(thread->th.th_affin_mask);
5865 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005866 }
Alp Toker98758b02014-03-02 04:12:06 +00005867#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005868
Jonathan Peyton30419822017-05-12 18:01:32 +00005869 __kmp_reap_team(thread->th.th_serial_team);
5870 thread->th.th_serial_team = NULL;
5871 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005872
Jonathan Peyton30419822017-05-12 18:01:32 +00005873 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005874
5875} // __kmp_reap_thread
5876
Jonathan Peyton30419822017-05-12 18:01:32 +00005877static void __kmp_internal_end(void) {
5878 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005879
Jonathan Peyton30419822017-05-12 18:01:32 +00005880 /* First, unregister the library */
5881 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005882
Jonathan Peyton30419822017-05-12 18:01:32 +00005883#if KMP_OS_WINDOWS
5884 /* In Win static library, we can't tell when a root actually dies, so we
5885 reclaim the data structures for any root threads that have died but not
5886 unregistered themselves, in order to shut down cleanly.
5887 In Win dynamic library we also can't tell when a thread dies. */
5888 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5889// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005890#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005891
Jonathan Peyton30419822017-05-12 18:01:32 +00005892 for (i = 0; i < __kmp_threads_capacity; i++)
5893 if (__kmp_root[i])
5894 if (__kmp_root[i]->r.r_active)
5895 break;
5896 KMP_MB(); /* Flush all pending memory write invalidates. */
5897 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5898
5899 if (i < __kmp_threads_capacity) {
5900#if KMP_USE_MONITOR
5901 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5902 KMP_MB(); /* Flush all pending memory write invalidates. */
5903
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005904 // Need to check that monitor was initialized before reaping it. If we are
5905 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5906 // __kmp_monitor will appear to contain valid data, but it is only valid in
5907 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00005908 // New behavior (201008): instead of keying off of the flag
5909 // __kmp_init_parallel, the monitor thread creation is keyed off
5910 // of the new flag __kmp_init_monitor.
5911 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5912 if (TCR_4(__kmp_init_monitor)) {
5913 __kmp_reap_monitor(&__kmp_monitor);
5914 TCW_4(__kmp_init_monitor, 0);
5915 }
5916 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5917 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5918#endif // KMP_USE_MONITOR
5919 } else {
5920/* TODO move this to cleanup code */
5921#ifdef KMP_DEBUG
5922 /* make sure that everything has properly ended */
5923 for (i = 0; i < __kmp_threads_capacity; i++) {
5924 if (__kmp_root[i]) {
5925 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5926 // there can be uber threads alive here
5927 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5928 }
5929 }
5930#endif
5931
5932 KMP_MB();
5933
5934 // Reap the worker threads.
5935 // This is valid for now, but be careful if threads are reaped sooner.
5936 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5937 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005938 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005939 __kmp_thread_pool = thread->th.th_next_pool;
5940 // Reap it.
5941 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5942 thread->th.th_next_pool = NULL;
5943 thread->th.th_in_pool = FALSE;
5944 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005945 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005946 __kmp_thread_pool_insert_pt = NULL;
5947
5948 // Reap teams.
5949 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5950 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005951 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005952 __kmp_team_pool = team->t.t_next_pool;
5953 // Reap it.
5954 team->t.t_next_pool = NULL;
5955 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005956 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005957
5958 __kmp_reap_task_teams();
5959
5960 for (i = 0; i < __kmp_threads_capacity; ++i) {
5961 // TBD: Add some checking...
5962 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5963 }
5964
5965 /* Make sure all threadprivate destructors get run by joining with all
5966 worker threads before resetting this flag */
5967 TCW_SYNC_4(__kmp_init_common, FALSE);
5968
5969 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5970 KMP_MB();
5971
5972#if KMP_USE_MONITOR
5973 // See note above: One of the possible fixes for CQ138434 / CQ140126
5974 //
5975 // FIXME: push both code fragments down and CSE them?
5976 // push them into __kmp_cleanup() ?
5977 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5978 if (TCR_4(__kmp_init_monitor)) {
5979 __kmp_reap_monitor(&__kmp_monitor);
5980 TCW_4(__kmp_init_monitor, 0);
5981 }
5982 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5983 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5984#endif
5985 } /* else !__kmp_global.t_active */
5986 TCW_4(__kmp_init_gtid, FALSE);
5987 KMP_MB(); /* Flush all pending memory write invalidates. */
5988
5989 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005990#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005991 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005992#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005993}
5994
Jonathan Peyton30419822017-05-12 18:01:32 +00005995void __kmp_internal_end_library(int gtid_req) {
5996 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5997 /* this shouldn't be a race condition because __kmp_internal_end() is the
5998 only place to clear __kmp_serial_init */
5999 /* we'll check this later too, after we get the lock */
6000 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6001 // redundaant, because the next check will work in any case.
6002 if (__kmp_global.g.g_abort) {
6003 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6004 /* TODO abort? */
6005 return;
6006 }
6007 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6008 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6009 return;
6010 }
6011
6012 KMP_MB(); /* Flush all pending memory write invalidates. */
6013
6014 /* find out who we are and what we should do */
6015 {
6016 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6017 KA_TRACE(
6018 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6019 if (gtid == KMP_GTID_SHUTDOWN) {
6020 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6021 "already shutdown\n"));
6022 return;
6023 } else if (gtid == KMP_GTID_MONITOR) {
6024 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6025 "registered, or system shutdown\n"));
6026 return;
6027 } else if (gtid == KMP_GTID_DNE) {
6028 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6029 "shutdown\n"));
6030 /* we don't know who we are, but we may still shutdown the library */
6031 } else if (KMP_UBER_GTID(gtid)) {
6032 /* unregister ourselves as an uber thread. gtid is no longer valid */
6033 if (__kmp_root[gtid]->r.r_active) {
6034 __kmp_global.g.g_abort = -1;
6035 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6036 KA_TRACE(10,
6037 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6038 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006039 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006040 } else {
6041 KA_TRACE(
6042 10,
6043 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6044 __kmp_unregister_root_current_thread(gtid);
6045 }
6046 } else {
6047/* worker threads may call this function through the atexit handler, if they
6048 * call exit() */
6049/* For now, skip the usual subsequent processing and just dump the debug buffer.
6050 TODO: do a thorough shutdown instead */
6051#ifdef DUMP_DEBUG_ON_EXIT
6052 if (__kmp_debug_buf)
6053 __kmp_dump_debug_buffer();
6054#endif
6055 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006056 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006057 }
6058 /* synchronize the termination process */
6059 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006060
Jonathan Peyton30419822017-05-12 18:01:32 +00006061 /* have we already finished */
6062 if (__kmp_global.g.g_abort) {
6063 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6064 /* TODO abort? */
6065 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6066 return;
6067 }
6068 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6069 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6070 return;
6071 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006072
Jonathan Peyton30419822017-05-12 18:01:32 +00006073 /* We need this lock to enforce mutex between this reading of
6074 __kmp_threads_capacity and the writing by __kmp_register_root.
6075 Alternatively, we can use a counter of roots that is atomically updated by
6076 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6077 __kmp_internal_end_*. */
6078 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006079
Jonathan Peyton30419822017-05-12 18:01:32 +00006080 /* now we can safely conduct the actual termination */
6081 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006082
Jonathan Peyton30419822017-05-12 18:01:32 +00006083 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6084 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006085
Jonathan Peyton30419822017-05-12 18:01:32 +00006086 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006087
Jonathan Peyton30419822017-05-12 18:01:32 +00006088#ifdef DUMP_DEBUG_ON_EXIT
6089 if (__kmp_debug_buf)
6090 __kmp_dump_debug_buffer();
6091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006092
Jonathan Peyton30419822017-05-12 18:01:32 +00006093#if KMP_OS_WINDOWS
6094 __kmp_close_console();
6095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006096
Jonathan Peyton30419822017-05-12 18:01:32 +00006097 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006098
6099} // __kmp_internal_end_library
6100
Jonathan Peyton30419822017-05-12 18:01:32 +00006101void __kmp_internal_end_thread(int gtid_req) {
6102 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006103
Jonathan Peyton30419822017-05-12 18:01:32 +00006104 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6105 /* this shouldn't be a race condition because __kmp_internal_end() is the
6106 * only place to clear __kmp_serial_init */
6107 /* we'll check this later too, after we get the lock */
6108 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6109 // redundant, because the next check will work in any case.
6110 if (__kmp_global.g.g_abort) {
6111 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6112 /* TODO abort? */
6113 return;
6114 }
6115 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6116 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6117 return;
6118 }
6119
6120 KMP_MB(); /* Flush all pending memory write invalidates. */
6121
6122 /* find out who we are and what we should do */
6123 {
6124 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6125 KA_TRACE(10,
6126 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6127 if (gtid == KMP_GTID_SHUTDOWN) {
6128 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6129 "already shutdown\n"));
6130 return;
6131 } else if (gtid == KMP_GTID_MONITOR) {
6132 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6133 "registered, or system shutdown\n"));
6134 return;
6135 } else if (gtid == KMP_GTID_DNE) {
6136 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6137 "shutdown\n"));
6138 return;
6139 /* we don't know who we are */
6140 } else if (KMP_UBER_GTID(gtid)) {
6141 /* unregister ourselves as an uber thread. gtid is no longer valid */
6142 if (__kmp_root[gtid]->r.r_active) {
6143 __kmp_global.g.g_abort = -1;
6144 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6145 KA_TRACE(10,
6146 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6147 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006148 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006149 } else {
6150 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6151 gtid));
6152 __kmp_unregister_root_current_thread(gtid);
6153 }
6154 } else {
6155 /* just a worker thread, let's leave */
6156 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6157
6158 if (gtid >= 0) {
6159 __kmp_threads[gtid]->th.th_task_team = NULL;
6160 }
6161
6162 KA_TRACE(10,
6163 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6164 gtid));
6165 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006166 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006167 }
6168#if defined KMP_DYNAMIC_LIB
6169 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6170 // thread, because we will better shutdown later in the library destructor.
6171 // The reason of this change is performance problem when non-openmp thread in
6172 // a loop forks and joins many openmp threads. We can save a lot of time
6173 // keeping worker threads alive until the program shutdown.
6174 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6175 // and Windows(DPD200287443) that occurs when using critical sections from
6176 // foreign threads.
6177 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6178 return;
6179#endif
6180 /* synchronize the termination process */
6181 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006182
Jonathan Peyton30419822017-05-12 18:01:32 +00006183 /* have we already finished */
6184 if (__kmp_global.g.g_abort) {
6185 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6186 /* TODO abort? */
6187 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6188 return;
6189 }
6190 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6191 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6192 return;
6193 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006194
Jonathan Peyton30419822017-05-12 18:01:32 +00006195 /* We need this lock to enforce mutex between this reading of
6196 __kmp_threads_capacity and the writing by __kmp_register_root.
6197 Alternatively, we can use a counter of roots that is atomically updated by
6198 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6199 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006200
Jonathan Peyton30419822017-05-12 18:01:32 +00006201 /* should we finish the run-time? are all siblings done? */
6202 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006203
Jonathan Peyton30419822017-05-12 18:01:32 +00006204 for (i = 0; i < __kmp_threads_capacity; ++i) {
6205 if (KMP_UBER_GTID(i)) {
6206 KA_TRACE(
6207 10,
6208 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6209 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6210 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6211 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006212 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006213 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006214
Jonathan Peyton30419822017-05-12 18:01:32 +00006215 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006216
Jonathan Peyton30419822017-05-12 18:01:32 +00006217 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006218
Jonathan Peyton30419822017-05-12 18:01:32 +00006219 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6220 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006221
Jonathan Peyton30419822017-05-12 18:01:32 +00006222 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006223
Jonathan Peyton30419822017-05-12 18:01:32 +00006224#ifdef DUMP_DEBUG_ON_EXIT
6225 if (__kmp_debug_buf)
6226 __kmp_dump_debug_buffer();
6227#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006228} // __kmp_internal_end_thread
6229
Jonathan Peyton30419822017-05-12 18:01:32 +00006230// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006231// Library registration stuff.
6232
Jonathan Peyton30419822017-05-12 18:01:32 +00006233static long __kmp_registration_flag = 0;
6234// Random value used to indicate library initialization.
6235static char *__kmp_registration_str = NULL;
6236// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006237
Jonathan Peyton30419822017-05-12 18:01:32 +00006238static inline char *__kmp_reg_status_name() {
6239 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6240 each thread. If registration and unregistration go in different threads
6241 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6242 env var can not be found, because the name will contain different pid. */
6243 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006244} // __kmp_reg_status_get
6245
Jonathan Peyton30419822017-05-12 18:01:32 +00006246void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006247
Jonathan Peyton30419822017-05-12 18:01:32 +00006248 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6249 int done = 0;
6250 union {
6251 double dtime;
6252 long ltime;
6253 } time;
6254#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6255 __kmp_initialize_system_tick();
6256#endif
6257 __kmp_read_system_time(&time.dtime);
6258 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6259 __kmp_registration_str =
6260 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6261 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006262
Jonathan Peyton30419822017-05-12 18:01:32 +00006263 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6264 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006265
Jonathan Peyton30419822017-05-12 18:01:32 +00006266 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006267
Jonathan Peyton30419822017-05-12 18:01:32 +00006268 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006269
Jonathan Peyton30419822017-05-12 18:01:32 +00006270 // Set environment variable, but do not overwrite if it is exist.
6271 __kmp_env_set(name, __kmp_registration_str, 0);
6272 // Check the variable is written.
6273 value = __kmp_env_get(name);
6274 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006275
Jonathan Peyton30419822017-05-12 18:01:32 +00006276 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006277
Jonathan Peyton30419822017-05-12 18:01:32 +00006278 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006279
Jonathan Peyton30419822017-05-12 18:01:32 +00006280 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6281 // Check whether it alive or dead.
6282 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6283 char *tail = value;
6284 char *flag_addr_str = NULL;
6285 char *flag_val_str = NULL;
6286 char const *file_name = NULL;
6287 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6288 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6289 file_name = tail;
6290 if (tail != NULL) {
6291 long *flag_addr = 0;
6292 long flag_val = 0;
6293 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6294 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6295 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6296 // First, check whether environment-encoded address is mapped into
6297 // addr space.
6298 // If so, dereference it to see if it still has the right value.
6299 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6300 neighbor = 1;
6301 } else {
6302 // If not, then we know the other copy of the library is no longer
6303 // running.
6304 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006305 }
6306 }
6307 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006308 switch (neighbor) {
6309 case 0: // Cannot parse environment variable -- neighbor status unknown.
6310 // Assume it is the incompatible format of future version of the
6311 // library. Assume the other library is alive.
6312 // WARN( ... ); // TODO: Issue a warning.
6313 file_name = "unknown library";
6314 // Attention! Falling to the next case. That's intentional.
6315 case 1: { // Neighbor is alive.
6316 // Check it is allowed.
6317 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6318 if (!__kmp_str_match_true(duplicate_ok)) {
6319 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006320 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6321 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006322 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006323 KMP_INTERNAL_FREE(duplicate_ok);
6324 __kmp_duplicate_library_ok = 1;
6325 done = 1; // Exit the loop.
6326 } break;
6327 case 2: { // Neighbor is dead.
6328 // Clear the variable and try to register library again.
6329 __kmp_env_unset(name);
6330 } break;
6331 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006332 }
6333 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006334 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006335 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006336 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006337
6338} // func __kmp_register_library_startup
6339
Jonathan Peyton30419822017-05-12 18:01:32 +00006340void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006341
Jonathan Peyton30419822017-05-12 18:01:32 +00006342 char *name = __kmp_reg_status_name();
6343 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006344
Jonathan Peyton30419822017-05-12 18:01:32 +00006345 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6346 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6347 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6348 // Ok, this is our variable. Delete it.
6349 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006350 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006351
Jonathan Peyton30419822017-05-12 18:01:32 +00006352 KMP_INTERNAL_FREE(__kmp_registration_str);
6353 KMP_INTERNAL_FREE(value);
6354 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006355
Jonathan Peyton30419822017-05-12 18:01:32 +00006356 __kmp_registration_flag = 0;
6357 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006358
6359} // __kmp_unregister_library
6360
Jim Cownie5e8470a2013-09-27 10:38:44 +00006361// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006362// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006363
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006364#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006365
Jonathan Peyton30419822017-05-12 18:01:32 +00006366static void __kmp_check_mic_type() {
6367 kmp_cpuid_t cpuid_state = {0};
6368 kmp_cpuid_t *cs_p = &cpuid_state;
6369 __kmp_x86_cpuid(1, 0, cs_p);
6370 // We don't support mic1 at the moment
6371 if ((cs_p->eax & 0xff0) == 0xB10) {
6372 __kmp_mic_type = mic2;
6373 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6374 __kmp_mic_type = mic3;
6375 } else {
6376 __kmp_mic_type = non_mic;
6377 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006378}
6379
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006380#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006381
Jonathan Peyton30419822017-05-12 18:01:32 +00006382static void __kmp_do_serial_initialize(void) {
6383 int i, gtid;
6384 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006385
Jonathan Peyton30419822017-05-12 18:01:32 +00006386 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006387
Jonathan Peyton30419822017-05-12 18:01:32 +00006388 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6389 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6390 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6391 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6392 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006393
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006394#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006395 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006396#endif
6397
Jonathan Peyton30419822017-05-12 18:01:32 +00006398 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006399
Jonathan Peyton30419822017-05-12 18:01:32 +00006400 /* Initialize internal memory allocator */
6401 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006402
Jonathan Peyton30419822017-05-12 18:01:32 +00006403 /* Register the library startup via an environment variable and check to see
6404 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006405
Jonathan Peyton30419822017-05-12 18:01:32 +00006406 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006407
Jonathan Peyton30419822017-05-12 18:01:32 +00006408 /* TODO reinitialization of library */
6409 if (TCR_4(__kmp_global.g.g_done)) {
6410 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6411 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006412
Jonathan Peyton30419822017-05-12 18:01:32 +00006413 __kmp_global.g.g_abort = 0;
6414 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006415
Jonathan Peyton30419822017-05-12 18:01:32 +00006416/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006417#if KMP_USE_ADAPTIVE_LOCKS
6418#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006419 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006420#endif
6421#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006422#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006423 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006424#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006425 __kmp_init_lock(&__kmp_global_lock);
6426 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6427 __kmp_init_lock(&__kmp_debug_lock);
6428 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6429 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6430 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6431 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6432 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6433 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6434 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6435 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6436 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6437 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6438 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6439 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6440 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6441 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6442 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006443#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006444 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006445#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006446 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006447
Jonathan Peyton30419822017-05-12 18:01:32 +00006448 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006449
Jonathan Peyton30419822017-05-12 18:01:32 +00006450 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006451
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006452#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006453 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006454#endif
6455
Jonathan Peyton30419822017-05-12 18:01:32 +00006456// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006457#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006458 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006459#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006460 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006461
Jonathan Peyton30419822017-05-12 18:01:32 +00006462 // From __kmp_init_dflt_team_nth()
6463 /* assume the entire machine will be used */
6464 __kmp_dflt_team_nth_ub = __kmp_xproc;
6465 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6466 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6467 }
6468 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6469 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6470 }
6471 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006472 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006473 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6474 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6475 __kmp_teams_max_nth = __kmp_sys_max_nth;
6476 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006477
Jonathan Peyton30419822017-05-12 18:01:32 +00006478 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6479 // part
6480 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006481#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006482 __kmp_monitor_wakeups =
6483 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6484 __kmp_bt_intervals =
6485 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006486#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006487 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6488 __kmp_library = library_throughput;
6489 // From KMP_SCHEDULE initialization
6490 __kmp_static = kmp_sch_static_balanced;
6491// AC: do not use analytical here, because it is non-monotonous
6492//__kmp_guided = kmp_sch_guided_iterative_chunked;
6493//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6494// need to repeat assignment
6495// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6496// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006497#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006498#define kmp_reduction_barrier_gather_bb ((int)1)
6499#define kmp_reduction_barrier_release_bb ((int)1)
6500#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6501#define kmp_reduction_barrier_release_pat bp_hyper_bar
6502#endif // KMP_FAST_REDUCTION_BARRIER
6503 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6504 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6505 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6506 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6507 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6508#if KMP_FAST_REDUCTION_BARRIER
6509 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6510 // lin_64 ): hyper,1
6511 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6512 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6513 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6514 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006515 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006516#endif // KMP_FAST_REDUCTION_BARRIER
6517 }
6518#if KMP_FAST_REDUCTION_BARRIER
6519#undef kmp_reduction_barrier_release_pat
6520#undef kmp_reduction_barrier_gather_pat
6521#undef kmp_reduction_barrier_release_bb
6522#undef kmp_reduction_barrier_gather_bb
6523#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006524#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006525 if (__kmp_mic_type == mic2) { // KNC
6526 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6527 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6528 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6529 1; // forkjoin release
6530 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6531 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6532 }
6533#if KMP_FAST_REDUCTION_BARRIER
6534 if (__kmp_mic_type == mic2) { // KNC
6535 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6536 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6537 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006538#endif // KMP_FAST_REDUCTION_BARRIER
6539#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006540
Jonathan Peyton30419822017-05-12 18:01:32 +00006541// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006542#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006543 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006544#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006545 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006546#endif
6547
Jonathan Peyton30419822017-05-12 18:01:32 +00006548 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6549 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006550
Jonathan Peyton30419822017-05-12 18:01:32 +00006551 __kmp_global.g.g_dynamic = FALSE;
6552 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006553
Jonathan Peyton30419822017-05-12 18:01:32 +00006554 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006555
Jonathan Peyton30419822017-05-12 18:01:32 +00006556// Print all messages in message catalog for testing purposes.
6557#ifdef KMP_DEBUG
6558 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6559 if (__kmp_str_match_true(val)) {
6560 kmp_str_buf_t buffer;
6561 __kmp_str_buf_init(&buffer);
6562 __kmp_i18n_dump_catalog(&buffer);
6563 __kmp_printf("%s", buffer.str);
6564 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006565 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006566 __kmp_env_free(&val);
6567#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006568
Jonathan Peyton30419822017-05-12 18:01:32 +00006569 __kmp_threads_capacity =
6570 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6571 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6572 __kmp_tp_capacity = __kmp_default_tp_capacity(
6573 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006574
Jonathan Peyton30419822017-05-12 18:01:32 +00006575 // If the library is shut down properly, both pools must be NULL. Just in
6576 // case, set them to NULL -- some memory may leak, but subsequent code will
6577 // work even if pools are not freed.
6578 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6579 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6580 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6581 __kmp_thread_pool = NULL;
6582 __kmp_thread_pool_insert_pt = NULL;
6583 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006584
Jonathan Peyton30419822017-05-12 18:01:32 +00006585 /* Allocate all of the variable sized records */
6586 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6587 * expandable */
6588 /* Since allocation is cache-aligned, just add extra padding at the end */
6589 size =
6590 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6591 CACHE_LINE;
6592 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6593 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6594 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006595
Jonathan Peyton30419822017-05-12 18:01:32 +00006596 /* init thread counts */
6597 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6598 0); // Asserts fail if the library is reinitializing and
6599 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6600 __kmp_all_nth = 0;
6601 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006602
Jonathan Peyton30419822017-05-12 18:01:32 +00006603 /* setup the uber master thread and hierarchy */
6604 gtid = __kmp_register_root(TRUE);
6605 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6606 KMP_ASSERT(KMP_UBER_GTID(gtid));
6607 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006608
Jonathan Peyton30419822017-05-12 18:01:32 +00006609 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006610
Jonathan Peyton30419822017-05-12 18:01:32 +00006611 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006612
Jonathan Peyton30419822017-05-12 18:01:32 +00006613#if KMP_OS_UNIX
6614 /* invoke the child fork handler */
6615 __kmp_register_atfork();
6616#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006617
Jonathan Peyton30419822017-05-12 18:01:32 +00006618#if !defined KMP_DYNAMIC_LIB
6619 {
6620 /* Invoke the exit handler when the program finishes, only for static
6621 library. For dynamic library, we already have _fini and DllMain. */
6622 int rc = atexit(__kmp_internal_end_atexit);
6623 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006624 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6625 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006626 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006627 }
6628#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006629
Jonathan Peyton30419822017-05-12 18:01:32 +00006630#if KMP_HANDLE_SIGNALS
6631#if KMP_OS_UNIX
6632 /* NOTE: make sure that this is called before the user installs their own
6633 signal handlers so that the user handlers are called first. this way they
6634 can return false, not call our handler, avoid terminating the library, and
6635 continue execution where they left off. */
6636 __kmp_install_signals(FALSE);
6637#endif /* KMP_OS_UNIX */
6638#if KMP_OS_WINDOWS
6639 __kmp_install_signals(TRUE);
6640#endif /* KMP_OS_WINDOWS */
6641#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006642
Jonathan Peyton30419822017-05-12 18:01:32 +00006643 /* we have finished the serial initialization */
6644 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006645
Jonathan Peyton30419822017-05-12 18:01:32 +00006646 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006647
Jonathan Peyton30419822017-05-12 18:01:32 +00006648 if (__kmp_settings) {
6649 __kmp_env_print();
6650 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006651
6652#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006653 if (__kmp_display_env || __kmp_display_env_verbose) {
6654 __kmp_env_print_2();
6655 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006656#endif // OMP_40_ENABLED
6657
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006658#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006659 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006660#endif
6661
Jonathan Peyton30419822017-05-12 18:01:32 +00006662 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006663
Jonathan Peyton30419822017-05-12 18:01:32 +00006664 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006665}
6666
Jonathan Peyton30419822017-05-12 18:01:32 +00006667void __kmp_serial_initialize(void) {
6668 if (__kmp_init_serial) {
6669 return;
6670 }
6671 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6672 if (__kmp_init_serial) {
6673 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6674 return;
6675 }
6676 __kmp_do_serial_initialize();
6677 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6678}
6679
6680static void __kmp_do_middle_initialize(void) {
6681 int i, j;
6682 int prev_dflt_team_nth;
6683
6684 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006685 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006686 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006687
Jonathan Peyton30419822017-05-12 18:01:32 +00006688 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006689
Jonathan Peyton30419822017-05-12 18:01:32 +00006690 // Save the previous value for the __kmp_dflt_team_nth so that
6691 // we can avoid some reinitialization if it hasn't changed.
6692 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006693
Alp Toker98758b02014-03-02 04:12:06 +00006694#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006695 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6696 // number of cores on the machine.
6697 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006698
Jonathan Peyton30419822017-05-12 18:01:32 +00006699 // Run through the __kmp_threads array and set the affinity mask
6700 // for each root thread that is currently registered with the RTL.
6701 for (i = 0; i < __kmp_threads_capacity; i++) {
6702 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6703 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006704 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006705 }
Alp Toker98758b02014-03-02 04:12:06 +00006706#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006707
Jonathan Peyton30419822017-05-12 18:01:32 +00006708 KMP_ASSERT(__kmp_xproc > 0);
6709 if (__kmp_avail_proc == 0) {
6710 __kmp_avail_proc = __kmp_xproc;
6711 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006712
Jonathan Peyton30419822017-05-12 18:01:32 +00006713 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6714 // correct them now
6715 j = 0;
6716 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6717 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6718 __kmp_avail_proc;
6719 j++;
6720 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006721
Jonathan Peyton30419822017-05-12 18:01:32 +00006722 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006723#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006724 // Default #threads = #cores
6725 __kmp_dflt_team_nth = __kmp_ncores;
6726 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6727 "__kmp_ncores (%d)\n",
6728 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006729#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006730 // Default #threads = #available OS procs
6731 __kmp_dflt_team_nth = __kmp_avail_proc;
6732 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6733 "__kmp_avail_proc(%d)\n",
6734 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006735#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006736 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006737
Jonathan Peyton30419822017-05-12 18:01:32 +00006738 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6739 __kmp_dflt_team_nth = KMP_MIN_NTH;
6740 }
6741 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6742 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6743 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006744
Jonathan Peyton30419822017-05-12 18:01:32 +00006745 // There's no harm in continuing if the following check fails,
6746 // but it indicates an error in the previous logic.
6747 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006748
Jonathan Peyton30419822017-05-12 18:01:32 +00006749 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6750 // Run through the __kmp_threads array and set the num threads icv for each
6751 // root thread that is currently registered with the RTL (which has not
6752 // already explicitly set its nthreads-var with a call to
6753 // omp_set_num_threads()).
6754 for (i = 0; i < __kmp_threads_capacity; i++) {
6755 kmp_info_t *thread = __kmp_threads[i];
6756 if (thread == NULL)
6757 continue;
6758 if (thread->th.th_current_task->td_icvs.nproc != 0)
6759 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006760
Jonathan Peyton30419822017-05-12 18:01:32 +00006761 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006762 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006763 }
6764 KA_TRACE(
6765 20,
6766 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6767 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006768
6769#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006770 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6771 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6772 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6773 if (__kmp_nth > __kmp_avail_proc) {
6774 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006775 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006776 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006777#endif /* KMP_ADJUST_BLOCKTIME */
6778
Jonathan Peyton30419822017-05-12 18:01:32 +00006779 /* we have finished middle initialization */
6780 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006781
Jonathan Peyton30419822017-05-12 18:01:32 +00006782 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006783}
6784
Jonathan Peyton30419822017-05-12 18:01:32 +00006785void __kmp_middle_initialize(void) {
6786 if (__kmp_init_middle) {
6787 return;
6788 }
6789 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6790 if (__kmp_init_middle) {
6791 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6792 return;
6793 }
6794 __kmp_do_middle_initialize();
6795 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6796}
6797
6798void __kmp_parallel_initialize(void) {
6799 int gtid = __kmp_entry_gtid(); // this might be a new root
6800
6801 /* synchronize parallel initialization (for sibling) */
6802 if (TCR_4(__kmp_init_parallel))
6803 return;
6804 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6805 if (TCR_4(__kmp_init_parallel)) {
6806 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6807 return;
6808 }
6809
6810 /* TODO reinitialization after we have already shut down */
6811 if (TCR_4(__kmp_global.g.g_done)) {
6812 KA_TRACE(
6813 10,
6814 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6815 __kmp_infinite_loop();
6816 }
6817
6818 /* jc: The lock __kmp_initz_lock is already held, so calling
6819 __kmp_serial_initialize would cause a deadlock. So we call
6820 __kmp_do_serial_initialize directly. */
6821 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006822 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006823 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006824
Jonathan Peyton30419822017-05-12 18:01:32 +00006825 /* begin initialization */
6826 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6827 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006828
6829#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006830 // Save the FP control regs.
6831 // Worker threads will set theirs to these values at thread startup.
6832 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6833 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6834 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006835#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6836
6837#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006838#if KMP_HANDLE_SIGNALS
6839 /* must be after __kmp_serial_initialize */
6840 __kmp_install_signals(TRUE);
6841#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006842#endif
6843
Jonathan Peyton30419822017-05-12 18:01:32 +00006844 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006845
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006846#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006847 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6848 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006850#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006851 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6852 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6853 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006854#endif
6855
Jonathan Peyton30419822017-05-12 18:01:32 +00006856 if (__kmp_version) {
6857 __kmp_print_version_2();
6858 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006859
Jonathan Peyton30419822017-05-12 18:01:32 +00006860 /* we have finished parallel initialization */
6861 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006862
Jonathan Peyton30419822017-05-12 18:01:32 +00006863 KMP_MB();
6864 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006865
Jonathan Peyton30419822017-05-12 18:01:32 +00006866 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006867}
6868
Jim Cownie5e8470a2013-09-27 10:38:44 +00006869/* ------------------------------------------------------------------------ */
6870
Jonathan Peyton30419822017-05-12 18:01:32 +00006871void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6872 kmp_team_t *team) {
6873 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006874
Jonathan Peyton30419822017-05-12 18:01:32 +00006875 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006876
Jonathan Peyton30419822017-05-12 18:01:32 +00006877 /* none of the threads have encountered any constructs, yet. */
6878 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006879#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006880 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006881#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006882 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6883 KMP_DEBUG_ASSERT(dispatch);
6884 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6885 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6886 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006887
Jonathan Peyton30419822017-05-12 18:01:32 +00006888 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006889#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006890 dispatch->th_doacross_buf_idx =
6891 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006892#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 if (__kmp_env_consistency_check)
6894 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006895
Jonathan Peyton30419822017-05-12 18:01:32 +00006896 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006897}
6898
Jonathan Peyton30419822017-05-12 18:01:32 +00006899void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6900 kmp_team_t *team) {
6901 if (__kmp_env_consistency_check)
6902 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006903
Jonathan Peyton30419822017-05-12 18:01:32 +00006904 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006905}
6906
Jonathan Peyton30419822017-05-12 18:01:32 +00006907int __kmp_invoke_task_func(int gtid) {
6908 int rc;
6909 int tid = __kmp_tid_from_gtid(gtid);
6910 kmp_info_t *this_thr = __kmp_threads[gtid];
6911 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006912
Jonathan Peyton30419822017-05-12 18:01:32 +00006913 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006914#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006915 if (__itt_stack_caller_create_ptr) {
6916 __kmp_itt_stack_callee_enter(
6917 (__itt_caller)
6918 team->t.t_stack_id); // inform ittnotify about entering user's code
6919 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006920#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006922 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006923#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006924
6925#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006926 void *dummy;
6927 void **exit_runtime_p;
6928 ompt_task_id_t my_task_id;
6929 ompt_parallel_id_t my_parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006930
Jonathan Peyton30419822017-05-12 18:01:32 +00006931 if (ompt_enabled) {
6932 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
6933 .ompt_task_info.frame.exit_runtime_frame);
6934 } else {
6935 exit_runtime_p = &dummy;
6936 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006937
6938#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00006939 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6940 my_parallel_id = team->t.ompt_team_info.parallel_id;
6941 if (ompt_enabled &&
6942 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6943 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
6944 my_task_id);
6945 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006946#endif
6947#endif
6948
Jonathan Peyton30419822017-05-12 18:01:32 +00006949 {
6950 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6951 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6952 rc =
6953 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6954 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006955#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006956 ,
6957 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006958#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006959 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006960#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006961 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006962#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006963 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006964
Jim Cownie5e8470a2013-09-27 10:38:44 +00006965#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006966 if (__itt_stack_caller_create_ptr) {
6967 __kmp_itt_stack_callee_leave(
6968 (__itt_caller)
6969 team->t.t_stack_id); // inform ittnotify about leaving user's code
6970 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006971#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006972 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006973
Jonathan Peyton30419822017-05-12 18:01:32 +00006974 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006975}
6976
6977#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006978void __kmp_teams_master(int gtid) {
6979 // This routine is called by all master threads in teams construct
6980 kmp_info_t *thr = __kmp_threads[gtid];
6981 kmp_team_t *team = thr->th.th_team;
6982 ident_t *loc = team->t.t_ident;
6983 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6984 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6985 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6986 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6987 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6988// Launch league of teams now, but not let workers execute
6989// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006990#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006991 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006992#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006993 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006994#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006995 (void *)thr->th.th_teams_microtask, // "unwrapped" task
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006996#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006997 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
6998 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006999#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007000 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007001#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00007002
Jonathan Peyton30419822017-05-12 18:01:32 +00007003 // AC: last parameter "1" eliminates join barrier which won't work because
7004 // worker threads are in a fork barrier waiting for more parallel regions
7005 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007006#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007007 ,
7008 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007009#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007010 ,
7011 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007012}
7013
Jonathan Peyton30419822017-05-12 18:01:32 +00007014int __kmp_invoke_teams_master(int gtid) {
7015 kmp_info_t *this_thr = __kmp_threads[gtid];
7016 kmp_team_t *team = this_thr->th.th_team;
7017#if KMP_DEBUG
7018 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7019 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7020 (void *)__kmp_teams_master);
7021#endif
7022 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7023 __kmp_teams_master(gtid);
7024 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7025 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007026}
7027#endif /* OMP_40_ENABLED */
7028
7029/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007030 encountered by this team. since this should be enclosed in the forkjoin
7031 critical section it should avoid race conditions with assymmetrical nested
7032 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007033
Jonathan Peyton30419822017-05-12 18:01:32 +00007034void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7035 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007036
Jonathan Peyton30419822017-05-12 18:01:32 +00007037 if (num_threads > 0)
7038 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007039}
7040
7041#if OMP_40_ENABLED
7042
7043/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007044 the number of threads for the next parallel region encountered */
7045void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7046 int num_threads) {
7047 kmp_info_t *thr = __kmp_threads[gtid];
7048 KMP_DEBUG_ASSERT(num_teams >= 0);
7049 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007050
Jonathan Peyton30419822017-05-12 18:01:32 +00007051 if (num_teams == 0)
7052 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007053 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007054 if (!__kmp_reserve_warn) {
7055 __kmp_reserve_warn = 1;
7056 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007057 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007058 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007059 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007060 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007061 }
7062 // Set number of teams (number of threads in the outer "parallel" of the
7063 // teams)
7064 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007065
Jonathan Peyton30419822017-05-12 18:01:32 +00007066 // Remember the number of threads for inner parallel regions
7067 if (num_threads == 0) {
7068 if (!TCR_4(__kmp_init_middle))
7069 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7070 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007071 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007072 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007073 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007075 } else {
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007076 if (num_teams * num_threads > __kmp_teams_max_nth) {
7077 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007078 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007079 __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007080 __kmp_msg(kmp_ms_warning,
7081 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7082 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7083 }
7084 num_threads = new_threads;
7085 }
7086 }
7087 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007088}
7089
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007091void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7092 kmp_info_t *thr = __kmp_threads[gtid];
7093 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007094}
7095
7096#endif /* OMP_40_ENABLED */
7097
7098/* Launch the worker threads into the microtask. */
7099
Jonathan Peyton30419822017-05-12 18:01:32 +00007100void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7101 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007102
7103#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007104 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007105#endif /* KMP_DEBUG */
7106
Jonathan Peyton30419822017-05-12 18:01:32 +00007107 KMP_DEBUG_ASSERT(team);
7108 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7109 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7110 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007111
Jonathan Peyton30419822017-05-12 18:01:32 +00007112 team->t.t_construct = 0; /* no single directives seen yet */
7113 team->t.t_ordered.dt.t_value =
7114 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007115
Jonathan Peyton30419822017-05-12 18:01:32 +00007116 /* Reset the identifiers on the dispatch buffer */
7117 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7118 if (team->t.t_max_nproc > 1) {
7119 int i;
7120 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7121 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007122#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007123 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007124#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007125 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007126 } else {
7127 team->t.t_disp_buffer[0].buffer_index = 0;
7128#if OMP_45_ENABLED
7129 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7130#endif
7131 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007132
Jonathan Peyton30419822017-05-12 18:01:32 +00007133 KMP_MB(); /* Flush all pending memory write invalidates. */
7134 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007135
7136#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007137 for (f = 0; f < team->t.t_nproc; f++) {
7138 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7139 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7140 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007141#endif /* KMP_DEBUG */
7142
Jonathan Peyton30419822017-05-12 18:01:32 +00007143 /* release the worker threads so they may begin working */
7144 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007145}
7146
Jonathan Peyton30419822017-05-12 18:01:32 +00007147void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7148 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007149
Jonathan Peyton30419822017-05-12 18:01:32 +00007150 KMP_DEBUG_ASSERT(team);
7151 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7152 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7153 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007154
Jonathan Peyton30419822017-05-12 18:01:32 +00007155/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007156
7157#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007158 if (__kmp_threads[gtid] &&
7159 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7160 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7161 __kmp_threads[gtid]);
7162 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7163 "team->t.t_nproc=%d\n",
7164 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7165 team->t.t_nproc);
7166 __kmp_print_structure();
7167 }
7168 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7169 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007170#endif /* KMP_DEBUG */
7171
Jonathan Peyton30419822017-05-12 18:01:32 +00007172 __kmp_join_barrier(gtid); /* wait for everyone */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007173
Jonathan Peyton30419822017-05-12 18:01:32 +00007174 KMP_MB(); /* Flush all pending memory write invalidates. */
7175 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007176}
7177
Jim Cownie5e8470a2013-09-27 10:38:44 +00007178/* ------------------------------------------------------------------------ */
7179
7180#ifdef USE_LOAD_BALANCE
7181
Jim Cownie5e8470a2013-09-27 10:38:44 +00007182// Return the worker threads actively spinning in the hot team, if we
7183// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007184static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7185 int i;
7186 int retval;
7187 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007188
Jonathan Peyton30419822017-05-12 18:01:32 +00007189 if (root->r.r_active) {
7190 return 0;
7191 }
7192 hot_team = root->r.r_hot_team;
7193 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7194 return hot_team->t.t_nproc - 1; // Don't count master thread
7195 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007196
Jonathan Peyton30419822017-05-12 18:01:32 +00007197 // Skip the master thread - it is accounted for elsewhere.
7198 retval = 0;
7199 for (i = 1; i < hot_team->t.t_nproc; i++) {
7200 if (hot_team->t.t_threads[i]->th.th_active) {
7201 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007202 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007203 }
7204 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007205}
7206
Jim Cownie5e8470a2013-09-27 10:38:44 +00007207// Perform an automatic adjustment to the number of
7208// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007209static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7210 int retval;
7211 int pool_active;
7212 int hot_team_active;
7213 int team_curr_active;
7214 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007215
Jonathan Peyton30419822017-05-12 18:01:32 +00007216 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7217 set_nproc));
7218 KMP_DEBUG_ASSERT(root);
7219 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7220 ->th.th_current_task->td_icvs.dynamic == TRUE);
7221 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007222
Jonathan Peyton30419822017-05-12 18:01:32 +00007223 if (set_nproc == 1) {
7224 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7225 return 1;
7226 }
7227
7228 // Threads that are active in the thread pool, active in the hot team for this
7229 // particular root (if we are at the outer par level), and the currently
7230 // executing thread (to become the master) are available to add to the new
7231 // team, but are currently contributing to the system load, and must be
7232 // accounted for.
7233 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7234 hot_team_active = __kmp_active_hot_team_nproc(root);
7235 team_curr_active = pool_active + hot_team_active + 1;
7236
7237 // Check the system load.
7238 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7239 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7240 "hot team active = %d\n",
7241 system_active, pool_active, hot_team_active));
7242
7243 if (system_active < 0) {
7244 // There was an error reading the necessary info from /proc, so use the
7245 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7246 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7247 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7248 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7249
7250 // Make this call behave like the thread limit algorithm.
7251 retval = __kmp_avail_proc - __kmp_nth +
7252 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7253 if (retval > set_nproc) {
7254 retval = set_nproc;
7255 }
7256 if (retval < KMP_MIN_NTH) {
7257 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007258 }
7259
Jonathan Peyton30419822017-05-12 18:01:32 +00007260 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7261 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007262 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007263 }
7264
7265 // There is a slight delay in the load balance algorithm in detecting new
7266 // running procs. The real system load at this instant should be at least as
7267 // large as the #active omp thread that are available to add to the team.
7268 if (system_active < team_curr_active) {
7269 system_active = team_curr_active;
7270 }
7271 retval = __kmp_avail_proc - system_active + team_curr_active;
7272 if (retval > set_nproc) {
7273 retval = set_nproc;
7274 }
7275 if (retval < KMP_MIN_NTH) {
7276 retval = KMP_MIN_NTH;
7277 }
7278
7279 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7280 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007281} // __kmp_load_balance_nproc()
7282
7283#endif /* USE_LOAD_BALANCE */
7284
Jim Cownie5e8470a2013-09-27 10:38:44 +00007285/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007286
7287/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007288void __kmp_cleanup(void) {
7289 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007290
Jonathan Peyton30419822017-05-12 18:01:32 +00007291 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007292
Jonathan Peyton30419822017-05-12 18:01:32 +00007293 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007294#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007295 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007296#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007297 TCW_4(__kmp_init_parallel, FALSE);
7298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007299
Jonathan Peyton30419822017-05-12 18:01:32 +00007300 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007301#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007302 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007303#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007304 __kmp_cleanup_hierarchy();
7305 TCW_4(__kmp_init_middle, FALSE);
7306 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007307
Jonathan Peyton30419822017-05-12 18:01:32 +00007308 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007309
Jonathan Peyton30419822017-05-12 18:01:32 +00007310 if (__kmp_init_serial) {
7311 __kmp_runtime_destroy();
7312 __kmp_init_serial = FALSE;
7313 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007314
Jonathan Peyton30419822017-05-12 18:01:32 +00007315 for (f = 0; f < __kmp_threads_capacity; f++) {
7316 if (__kmp_root[f] != NULL) {
7317 __kmp_free(__kmp_root[f]);
7318 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007319 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007320 }
7321 __kmp_free(__kmp_threads);
7322 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7323 // there is no need in freeing __kmp_root.
7324 __kmp_threads = NULL;
7325 __kmp_root = NULL;
7326 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007327
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007328#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007329 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007330#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007331 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007332#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007333
Jonathan Peyton30419822017-05-12 18:01:32 +00007334#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007335 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007336 __kmp_cpuinfo_file = NULL;
7337#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007338
Jonathan Peyton30419822017-05-12 18:01:32 +00007339#if KMP_USE_ADAPTIVE_LOCKS
7340#if KMP_DEBUG_ADAPTIVE_LOCKS
7341 __kmp_print_speculative_stats();
7342#endif
7343#endif
7344 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7345 __kmp_nested_nth.nth = NULL;
7346 __kmp_nested_nth.size = 0;
7347 __kmp_nested_nth.used = 0;
7348 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7349 __kmp_nested_proc_bind.bind_types = NULL;
7350 __kmp_nested_proc_bind.size = 0;
7351 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007352
Jonathan Peyton30419822017-05-12 18:01:32 +00007353 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007354
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007355#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007356 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007357#endif
7358
Jonathan Peyton30419822017-05-12 18:01:32 +00007359 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007360}
7361
7362/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007363
7364int __kmp_ignore_mppbeg(void) {
7365 char *env;
7366
7367 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7368 if (__kmp_str_match_false(env))
7369 return FALSE;
7370 }
7371 // By default __kmpc_begin() is no-op.
7372 return TRUE;
7373}
7374
7375int __kmp_ignore_mppend(void) {
7376 char *env;
7377
7378 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7379 if (__kmp_str_match_false(env))
7380 return FALSE;
7381 }
7382 // By default __kmpc_end() is no-op.
7383 return TRUE;
7384}
7385
7386void __kmp_internal_begin(void) {
7387 int gtid;
7388 kmp_root_t *root;
7389
7390 /* this is a very important step as it will register new sibling threads
7391 and assign these new uber threads a new gtid */
7392 gtid = __kmp_entry_gtid();
7393 root = __kmp_threads[gtid]->th.th_root;
7394 KMP_ASSERT(KMP_UBER_GTID(gtid));
7395
7396 if (root->r.r_begin)
7397 return;
7398 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7399 if (root->r.r_begin) {
7400 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7401 return;
7402 }
7403
7404 root->r.r_begin = TRUE;
7405
7406 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7407}
7408
Jim Cownie5e8470a2013-09-27 10:38:44 +00007409/* ------------------------------------------------------------------------ */
7410
Jonathan Peyton30419822017-05-12 18:01:32 +00007411void __kmp_user_set_library(enum library_type arg) {
7412 int gtid;
7413 kmp_root_t *root;
7414 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007415
Jonathan Peyton30419822017-05-12 18:01:32 +00007416 /* first, make sure we are initialized so we can get our gtid */
7417
7418 gtid = __kmp_entry_gtid();
7419 thread = __kmp_threads[gtid];
7420
7421 root = thread->th.th_root;
7422
7423 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7424 library_serial));
7425 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7426 thread */
7427 KMP_WARNING(SetLibraryIncorrectCall);
7428 return;
7429 }
7430
7431 switch (arg) {
7432 case library_serial:
7433 thread->th.th_set_nproc = 0;
7434 set__nproc(thread, 1);
7435 break;
7436 case library_turnaround:
7437 thread->th.th_set_nproc = 0;
7438 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7439 : __kmp_dflt_team_nth_ub);
7440 break;
7441 case library_throughput:
7442 thread->th.th_set_nproc = 0;
7443 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7444 : __kmp_dflt_team_nth_ub);
7445 break;
7446 default:
7447 KMP_FATAL(UnknownLibraryType, arg);
7448 }
7449
7450 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007451}
7452
Jonathan Peyton30419822017-05-12 18:01:32 +00007453void __kmp_aux_set_stacksize(size_t arg) {
7454 if (!__kmp_init_serial)
7455 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007456
7457#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007458 if (arg & (0x1000 - 1)) {
7459 arg &= ~(0x1000 - 1);
7460 if (arg + 0x1000) /* check for overflow if we round up */
7461 arg += 0x1000;
7462 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007463#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007464 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007465
Jonathan Peyton30419822017-05-12 18:01:32 +00007466 /* only change the default stacksize before the first parallel region */
7467 if (!TCR_4(__kmp_init_parallel)) {
7468 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007469
Jonathan Peyton30419822017-05-12 18:01:32 +00007470 if (value < __kmp_sys_min_stksize)
7471 value = __kmp_sys_min_stksize;
7472 else if (value > KMP_MAX_STKSIZE)
7473 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007474
Jonathan Peyton30419822017-05-12 18:01:32 +00007475 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007476
Jonathan Peyton30419822017-05-12 18:01:32 +00007477 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7478 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007479
Jonathan Peyton30419822017-05-12 18:01:32 +00007480 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007481}
7482
7483/* set the behaviour of the runtime library */
7484/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007485void __kmp_aux_set_library(enum library_type arg) {
7486 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007487
Jonathan Peyton30419822017-05-12 18:01:32 +00007488 switch (__kmp_library) {
7489 case library_serial: {
7490 KMP_INFORM(LibraryIsSerial);
7491 (void)__kmp_change_library(TRUE);
7492 } break;
7493 case library_turnaround:
7494 (void)__kmp_change_library(TRUE);
7495 break;
7496 case library_throughput:
7497 (void)__kmp_change_library(FALSE);
7498 break;
7499 default:
7500 KMP_FATAL(UnknownLibraryType, arg);
7501 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007502}
7503
7504/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7507 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007508#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007509 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007510#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007511 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007512
Jonathan Peyton30419822017-05-12 18:01:32 +00007513 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007514
Jonathan Peyton30419822017-05-12 18:01:32 +00007515 /* Normalize and set blocktime for the teams */
7516 if (blocktime < KMP_MIN_BLOCKTIME)
7517 blocktime = KMP_MIN_BLOCKTIME;
7518 else if (blocktime > KMP_MAX_BLOCKTIME)
7519 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007520
Jonathan Peyton30419822017-05-12 18:01:32 +00007521 set__blocktime_team(thread->th.th_team, tid, blocktime);
7522 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007524#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007525 /* Calculate and set blocktime intervals for the teams */
7526 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007527
Jonathan Peyton30419822017-05-12 18:01:32 +00007528 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7529 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007530#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007531
Jonathan Peyton30419822017-05-12 18:01:32 +00007532 /* Set whether blocktime has been set to "TRUE" */
7533 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007534
Jonathan Peyton30419822017-05-12 18:01:32 +00007535 set__bt_set_team(thread->th.th_team, tid, bt_set);
7536 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007537#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007538 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7539 "bt_intervals=%d, monitor_updates=%d\n",
7540 __kmp_gtid_from_tid(tid, thread->th.th_team),
7541 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7542 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007543#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007544 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7545 __kmp_gtid_from_tid(tid, thread->th.th_team),
7546 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007547#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007548}
7549
Jonathan Peyton30419822017-05-12 18:01:32 +00007550void __kmp_aux_set_defaults(char const *str, int len) {
7551 if (!__kmp_init_serial) {
7552 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00007553 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007554 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007555
Jonathan Peyton30419822017-05-12 18:01:32 +00007556 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007557#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007558 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007559#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007560 ) {
7561 __kmp_env_print();
7562 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007563} // __kmp_aux_set_defaults
7564
7565/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007566/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007567
Jim Cownie5e8470a2013-09-27 10:38:44 +00007568PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007569__kmp_determine_reduction_method(
7570 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7571 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7572 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007573
Jonathan Peyton30419822017-05-12 18:01:32 +00007574 // Default reduction method: critical construct ( lck != NULL, like in current
7575 // PAROPT )
7576 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7577 // can be selected by RTL
7578 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7579 // can be selected by RTL
7580 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7581 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007582
Jonathan Peyton30419822017-05-12 18:01:32 +00007583 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007584
Jonathan Peyton30419822017-05-12 18:01:32 +00007585 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007586
Jonathan Peyton30419822017-05-12 18:01:32 +00007587 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7588 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007589
Jonathan Peyton30419822017-05-12 18:01:32 +00007590#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7591 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7592#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007593
Jonathan Peyton30419822017-05-12 18:01:32 +00007594 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007595
Jonathan Peyton30419822017-05-12 18:01:32 +00007596 // another choice of getting a team size (with 1 dynamic deference) is slower
7597 team_size = __kmp_get_team_num_threads(global_tid);
7598 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007599
Jonathan Peyton30419822017-05-12 18:01:32 +00007600 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007601
Jonathan Peyton30419822017-05-12 18:01:32 +00007602 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007603
Jonathan Peyton30419822017-05-12 18:01:32 +00007604 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7605 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007606
Jonathan Peyton30419822017-05-12 18:01:32 +00007607#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007608
Jonathan Peyton30419822017-05-12 18:01:32 +00007609#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7610 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007611
Jonathan Peyton30419822017-05-12 18:01:32 +00007612 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007613
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007614#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007615 if (__kmp_mic_type != non_mic) {
7616 teamsize_cutoff = 8;
7617 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007618#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007619 if (tree_available) {
7620 if (team_size <= teamsize_cutoff) {
7621 if (atomic_available) {
7622 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007623 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007624 } else {
7625 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7626 }
7627 } else if (atomic_available) {
7628 retval = atomic_reduce_block;
7629 }
7630#else
7631#error "Unknown or unsupported OS"
7632#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7633// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007634
Jonathan Peyton30419822017-05-12 18:01:32 +00007635#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7636
7637#if KMP_OS_LINUX || KMP_OS_WINDOWS
7638
7639 // basic tuning
7640
7641 if (atomic_available) {
7642 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7643 retval = atomic_reduce_block;
7644 }
7645 } // otherwise: use critical section
7646
7647#elif KMP_OS_DARWIN
7648
7649 if (atomic_available && (num_vars <= 3)) {
7650 retval = atomic_reduce_block;
7651 } else if (tree_available) {
7652 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7653 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7654 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7655 }
7656 } // otherwise: use critical section
7657
7658#else
7659#error "Unknown or unsupported OS"
7660#endif
7661
7662#else
7663#error "Unknown or unsupported architecture"
7664#endif
7665 }
7666
7667 // KMP_FORCE_REDUCTION
7668
7669 // If the team is serialized (team_size == 1), ignore the forced reduction
7670 // method and stay with the unsynchronized method (empty_reduce_block)
7671 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7672 team_size != 1) {
7673
7674 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7675
7676 int atomic_available, tree_available;
7677
7678 switch ((forced_retval = __kmp_force_reduction_method)) {
7679 case critical_reduce_block:
7680 KMP_ASSERT(lck); // lck should be != 0
7681 break;
7682
7683 case atomic_reduce_block:
7684 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7685 if (!atomic_available) {
7686 KMP_WARNING(RedMethodNotSupported, "atomic");
7687 forced_retval = critical_reduce_block;
7688 }
7689 break;
7690
7691 case tree_reduce_block:
7692 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7693 if (!tree_available) {
7694 KMP_WARNING(RedMethodNotSupported, "tree");
7695 forced_retval = critical_reduce_block;
7696 } else {
7697#if KMP_FAST_REDUCTION_BARRIER
7698 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7699#endif
7700 }
7701 break;
7702
7703 default:
7704 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007705 }
7706
Jonathan Peyton30419822017-05-12 18:01:32 +00007707 retval = forced_retval;
7708 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007709
Jonathan Peyton30419822017-05-12 18:01:32 +00007710 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007711
Jonathan Peyton30419822017-05-12 18:01:32 +00007712#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7713#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7714
7715 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007716}
7717
7718// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007719kmp_int32 __kmp_get_reduce_method(void) {
7720 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007721}