blob: 2a67206f9810896bc045d9223ff5da7ac7ab1288 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000028#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
44char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
45#endif /* defined(KMP_GOMP_COMPAT) */
46
47char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000048#if OMP_45_ENABLED
49 "4.5 (201511)";
50#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000051 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000052#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000053 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif
55
56#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000057char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000058#endif /* KMP_DEBUG */
59
Jim Cownie181b4bb2013-12-23 17:28:57 +000060#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
61
Jim Cownie5e8470a2013-09-27 10:38:44 +000062/* ------------------------------------------------------------------------ */
63/* ------------------------------------------------------------------------ */
64
65kmp_info_t __kmp_monitor;
66
67/* ------------------------------------------------------------------------ */
68/* ------------------------------------------------------------------------ */
69
70/* Forward declarations */
71
72void __kmp_cleanup( void );
73
74static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000075static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000076#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000077static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000078#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000079static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000080void __kmp_fork_barrier( int gtid, int tid );
81void __kmp_join_barrier( int gtid );
82void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000083
Jim Cownie5e8470a2013-09-27 10:38:44 +000084#ifdef USE_LOAD_BALANCE
85static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
86#endif
87
88static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000089#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000090static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000092static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
93static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
94static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
95
96/* ------------------------------------------------------------------------ */
97/* ------------------------------------------------------------------------ */
98
99/* Calculate the identifier of the current thread */
100/* fast (and somewhat portable) way to get unique */
101/* identifier of executing thread. */
102/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
103
104int
105__kmp_get_global_thread_id( )
106{
107 int i;
108 kmp_info_t **other_threads;
109 size_t stack_data;
110 char *stack_addr;
111 size_t stack_size;
112 char *stack_base;
113
114 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth ));
116
117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
118 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
119 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
120 __kmp_init_gtid for this to work. */
121
122 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
123
124#ifdef KMP_TDATA_GTID
125 if ( TCR_4(__kmp_gtid_mode) >= 3) {
126 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
127 return __kmp_gtid;
128 }
129#endif
130 if ( TCR_4(__kmp_gtid_mode) >= 2) {
131 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
132 return __kmp_gtid_get_specific();
133 }
134 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
135
136 stack_addr = (char*) & stack_data;
137 other_threads = __kmp_threads;
138
139 /*
140 ATT: The code below is a source of potential bugs due to unsynchronized access to
141 __kmp_threads array. For example:
142 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
143 2. Current thread is suspended by OS.
144 3. Another thread unregisters and finishes (debug versions of free() may fill memory
145 with something like 0xEF).
146 4. Current thread is resumed.
147 5. Current thread reads junk from *thr.
148 TODO: Fix it.
149 --ln
150 */
151
152 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
153
154 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
155 if( !thr ) continue;
156
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
160 /* stack grows down -- search through all of the active threads */
161
162 if( stack_addr <= stack_base ) {
163 size_t stack_diff = stack_base - stack_addr;
164
165 if( stack_diff <= stack_size ) {
166 /* The only way we can be closer than the allocated */
167 /* stack size is if we are running on this thread. */
168 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
169 return i;
170 }
171 }
172 }
173
174 /* get specific to try and determine our gtid */
175 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
176 "thread, using TLS\n" ));
177 i = __kmp_gtid_get_specific();
178
179 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
180
181 /* if we havn't been assigned a gtid, then return code */
182 if( i<0 ) return i;
183
184 /* dynamically updated stack window for uber threads to avoid get_specific call */
185 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
186 KMP_FATAL( StackOverflow, i );
187 }
188
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000189 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000190 if( stack_addr > stack_base ) {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
193 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
194 } else {
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
196 }
197
198 /* Reprint stack bounds for ubermaster since they have been refined */
199 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000200 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000203 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 "th_%d stack (refinement)", i );
205 }
206 return i;
207}
208
209int
210__kmp_get_global_thread_id_reg( )
211{
212 int gtid;
213
214 if ( !__kmp_init_serial ) {
215 gtid = KMP_GTID_DNE;
216 } else
217#ifdef KMP_TDATA_GTID
218 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
219 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
220 gtid = __kmp_gtid;
221 } else
222#endif
223 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
224 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
225 gtid = __kmp_gtid_get_specific();
226 } else {
227 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
228 gtid = __kmp_get_global_thread_id();
229 }
230
231 /* we must be a new uber master sibling thread */
232 if( gtid == KMP_GTID_DNE ) {
233 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
234 "Registering a new gtid.\n" ));
235 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
236 if( !__kmp_init_serial ) {
237 __kmp_do_serial_initialize();
238 gtid = __kmp_gtid_get_specific();
239 } else {
240 gtid = __kmp_register_root(FALSE);
241 }
242 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
243 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
244 }
245
246 KMP_DEBUG_ASSERT( gtid >=0 );
247
248 return gtid;
249}
250
251/* caller must hold forkjoin_lock */
252void
253__kmp_check_stack_overlap( kmp_info_t *th )
254{
255 int f;
256 char *stack_beg = NULL;
257 char *stack_end = NULL;
258 int gtid;
259
260 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
261 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000262 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
263 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264
265 gtid = __kmp_gtid_from_thread( th );
266
267 if (gtid == KMP_GTID_MONITOR) {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%s stack (%s)", "mon",
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 } else {
272 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%d stack (%s)", gtid,
274 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
275 }
276 }
277
278 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000279 gtid = __kmp_gtid_from_thread( th );
280 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 {
282 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
283 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
285 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 }
287
288 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
289 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
290
291 if( f_th && f_th != th ) {
292 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
293 char *other_stack_beg = other_stack_end -
294 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
295 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
296 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
297
298 /* Print the other stack values before the abort */
299 if ( __kmp_storage_map )
300 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
301 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
302 "th_%d stack (overlapped)",
303 __kmp_gtid_from_thread( f_th ) );
304
305 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
306 }
307 }
308 }
309 }
310 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
311}
312
313
314/* ------------------------------------------------------------------------ */
315
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316/* ------------------------------------------------------------------------ */
317
318void
319__kmp_infinite_loop( void )
320{
321 static int done = FALSE;
322
323 while (! done) {
324 KMP_YIELD( 1 );
325 }
326}
327
328#define MAX_MESSAGE 512
329
330void
331__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
332 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 va_list ap;
334
335 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000336 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
338 __kmp_vprintf( kmp_err, buffer, ap );
339#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000340 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000341 if(gtid >= 0) {
342 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
343 if( __kmp_storage_map_verbose ) {
344 node = __kmp_get_host_node(p1);
345 if(node < 0) /* doesn't work, so don't try this next time */
346 __kmp_storage_map_verbose = FALSE;
347 else {
348 char *last;
349 int lastNode;
350 int localProc = __kmp_get_cpu_from_gtid(gtid);
351
Jonathan Peyton762bc462016-10-26 21:42:48 +0000352 const int page_size = KMP_GET_PAGE_SIZE();
353
354 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) );
355 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356 if(localProc >= 0)
357 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
358 else
359 __kmp_printf_no_lock(" GTID %d\n", gtid);
360# if KMP_USE_PRCTL
361/* The more elaborate format is disabled for now because of the prctl hanging bug. */
362 do {
363 last = p1;
364 lastNode = node;
365 /* This loop collates adjacent pages with the same host node. */
366 do {
Jonathan Peyton762bc462016-10-26 21:42:48 +0000367 (char*)p1 += page_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000368 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
369 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
370 (char*)p1 - 1, lastNode);
371 } while(p1 <= p2);
372# else
373 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000374 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000375 if(p1 < p2) {
376 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000377 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000378 }
379# endif
380 }
381 }
382 } else
383 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
384 }
385#endif /* KMP_PRINT_DATA_PLACEMENT */
386 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
387}
388
389void
390__kmp_warn( char const * format, ... )
391{
392 char buffer[MAX_MESSAGE];
393 va_list ap;
394
395 if ( __kmp_generate_warnings == kmp_warnings_off ) {
396 return;
397 }
398
399 va_start( ap, format );
400
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000401 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
403 __kmp_vprintf( kmp_err, buffer, ap );
404 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
405
406 va_end( ap );
407}
408
409void
410__kmp_abort_process()
411{
412
413 // Later threads may stall here, but that's ok because abort() will kill them.
414 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
415
416 if ( __kmp_debug_buf ) {
417 __kmp_dump_debug_buffer();
418 }; // if
419
420 if ( KMP_OS_WINDOWS ) {
421 // Let other threads know of abnormal termination and prevent deadlock
422 // if abort happened during library initialization or shutdown
423 __kmp_global.g.g_abort = SIGABRT;
424
425 /*
426 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
427 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
428 works well, but this function is not available in VS7 (this is not problem for DLL, but
429 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
430 not help, at least in some versions of MS C RTL.
431
432 It seems following sequence is the only way to simulate abort() and avoid pop-up error
433 box.
434 */
435 raise( SIGABRT );
436 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
437 } else {
438 abort();
439 }; // if
440
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
443
444} // __kmp_abort_process
445
446void
447__kmp_abort_thread( void )
448{
449 // TODO: Eliminate g_abort global variable and this function.
450 // In case of abort just call abort(), it will kill all the threads.
451 __kmp_infinite_loop();
452} // __kmp_abort_thread
453
454/* ------------------------------------------------------------------------ */
455
456/*
457 * Print out the storage map for the major kmp_info_t thread data structures
458 * that are allocated together.
459 */
460
461static void
462__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
463{
464 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
467 "th_%d.th_info", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
470 "th_%d.th_local", gtid );
471
472 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
474
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier+1],
477 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
478
479 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier+1],
481 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
482
483 #if KMP_FAST_REDUCTION_BARRIER
484 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
485 &thr->th.th_bar[bs_reduction_barrier+1],
486 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
487 #endif // KMP_FAST_REDUCTION_BARRIER
488}
489
490/*
491 * Print out the storage map for the major kmp_team_t team data structures
492 * that are allocated together.
493 */
494
495static void
496__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
497{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
500 header, team_id );
501
502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
504
505
506 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
507 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
508
509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
510 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
511
512 #if KMP_FAST_REDUCTION_BARRIER
513 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
514 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
515 #endif // KMP_FAST_REDUCTION_BARRIER
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
518 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
519
520 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
521 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
522
523 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
524 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
525 header, team_id );
526
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
528 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
529 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
530}
531
532static void __kmp_init_allocator() {}
533static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534
535/* ------------------------------------------------------------------------ */
536
Jonathan Peyton99016992015-05-26 17:32:53 +0000537#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000538# if KMP_OS_WINDOWS
539
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540static void
541__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
542 // TODO: Change to __kmp_break_bootstrap_lock().
543 __kmp_init_bootstrap_lock( lck ); // make the lock released
544}
545
546static void
547__kmp_reset_locks_on_process_detach( int gtid_req ) {
548 int i;
549 int thread_count;
550
551 // PROCESS_DETACH is expected to be called by a thread
552 // that executes ProcessExit() or FreeLibrary().
553 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
554 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
555 // However, in fact, some threads can be still alive here, although being about to be terminated.
556 // The threads in the array with ds_thread==0 are most suspicious.
557 // Actually, it can be not safe to access the __kmp_threads[].
558
559 // TODO: does it make sense to check __kmp_roots[] ?
560
561 // Let's check that there are no other alive threads registered with the OMP lib.
562 while( 1 ) {
563 thread_count = 0;
564 for( i = 0; i < __kmp_threads_capacity; ++i ) {
565 if( !__kmp_threads ) continue;
566 kmp_info_t* th = __kmp_threads[ i ];
567 if( th == NULL ) continue;
568 int gtid = th->th.th_info.ds.ds_gtid;
569 if( gtid == gtid_req ) continue;
570 if( gtid < 0 ) continue;
571 DWORD exit_val;
572 int alive = __kmp_is_thread_alive( th, &exit_val );
573 if( alive ) {
574 ++thread_count;
575 }
576 }
577 if( thread_count == 0 ) break; // success
578 }
579
580 // Assume that I'm alone.
581
582 // Now it might be probably safe to check and reset locks.
583 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
584 __kmp_reset_lock( &__kmp_forkjoin_lock );
585 #ifdef KMP_DEBUG
586 __kmp_reset_lock( &__kmp_stdio_lock );
587 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588}
589
590BOOL WINAPI
591DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
592 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
593
594 switch( fdwReason ) {
595
596 case DLL_PROCESS_ATTACH:
597 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
598
599 return TRUE;
600
601 case DLL_PROCESS_DETACH:
602 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
603 __kmp_gtid_get_specific() ));
604
605 if( lpReserved != NULL )
606 {
607 // lpReserved is used for telling the difference:
608 // lpReserved == NULL when FreeLibrary() was called,
609 // lpReserved != NULL when the process terminates.
610 // When FreeLibrary() is called, worker threads remain alive.
611 // So they will release the forkjoin lock by themselves.
612 // When the process terminates, worker threads disappear triggering
613 // the problem of unreleased forkjoin lock as described below.
614
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000615 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616 // The problem comes up if that worker thread becomes dead
617 // before it releases the forkjoin lock.
618 // The forkjoin lock remains taken, while the thread
619 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
620 // will try to take the forkjoin lock and will always fail,
621 // so that the application will never finish [normally].
622 // This scenario is possible if __kmpc_end() has not been executed.
623 // It looks like it's not a corner case, but common cases:
624 // - the main function was compiled by an alternative compiler;
625 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
626 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
627 // - alive foreign thread prevented __kmpc_end from doing cleanup.
628
629 // This is a hack to work around the problem.
630 // TODO: !!! to figure out something better.
631 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
632 }
633
634 __kmp_internal_end_library( __kmp_gtid_get_specific() );
635
636 return TRUE;
637
638 case DLL_THREAD_ATTACH:
639 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
640
641 /* if we wanted to register new siblings all the time here call
642 * __kmp_get_gtid(); */
643 return TRUE;
644
645 case DLL_THREAD_DETACH:
646 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
647 __kmp_gtid_get_specific() ));
648
649 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
650 return TRUE;
651 }
652
653 return TRUE;
654}
655
656# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000657#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658
659
660/* ------------------------------------------------------------------------ */
661
662/* Change the library type to "status" and return the old type */
663/* called from within initialization routines where __kmp_initz_lock is held */
664int
665__kmp_change_library( int status )
666{
667 int old_status;
668
669 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
670
671 if (status) {
672 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
673 }
674 else {
675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
676 }
677
678 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
679}
680
681/* ------------------------------------------------------------------------ */
682/* ------------------------------------------------------------------------ */
683
684/* __kmp_parallel_deo --
685 * Wait until it's our turn.
686 */
687void
688__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
689{
690 int gtid = *gtid_ref;
691#ifdef BUILD_PARALLEL_ORDERED
692 kmp_team_t *team = __kmp_team_from_gtid( gtid );
693#endif /* BUILD_PARALLEL_ORDERED */
694
695 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000696 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000697#if KMP_USE_DYNAMIC_LOCK
698 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
699#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 }
703#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000704 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000705 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KMP_MB();
708 }
709#endif /* BUILD_PARALLEL_ORDERED */
710}
711
712/* __kmp_parallel_dxo --
713 * Signal the next task.
714 */
715
716void
717__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
718{
719 int gtid = *gtid_ref;
720#ifdef BUILD_PARALLEL_ORDERED
721 int tid = __kmp_tid_from_gtid( gtid );
722 kmp_team_t *team = __kmp_team_from_gtid( gtid );
723#endif /* BUILD_PARALLEL_ORDERED */
724
725 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000726 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000727 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
728 }
729#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000730 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731 KMP_MB(); /* Flush all pending memory write invalidates. */
732
733 /* use the tid of the next thread in this team */
734 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000735 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000737#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000738 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000739 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
740 /* accept blame for "ordered" waiting */
741 kmp_info_t *this_thread = __kmp_threads[gtid];
742 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
743 this_thread->th.ompt_thread_info.wait_id);
744 }
745#endif
746
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747 KMP_MB(); /* Flush all pending memory write invalidates. */
748 }
749#endif /* BUILD_PARALLEL_ORDERED */
750}
751
752/* ------------------------------------------------------------------------ */
753/* ------------------------------------------------------------------------ */
754
755/* ------------------------------------------------------------------------ */
756/* ------------------------------------------------------------------------ */
757
758/* The BARRIER for a SINGLE process section is always explicit */
759
760int
761__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
762{
763 int status;
764 kmp_info_t *th;
765 kmp_team_t *team;
766
767 if( ! TCR_4(__kmp_init_parallel) )
768 __kmp_parallel_initialize();
769
770 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772 status = 0;
773
774 th->th.th_ident = id_ref;
775
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000776 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777 status = 1;
778 } else {
779 kmp_int32 old_this = th->th.th_local.this_construct;
780
781 ++th->th.th_local.this_construct;
782 /* try to set team count to thread count--success means thread got the
783 single block
784 */
785 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000786 if (team->t.t_construct == old_this) {
787 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
788 th->th.th_local.this_construct);
789 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000790#if USE_ITT_BUILD
791 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
792#if OMP_40_ENABLED
793 th->th.th_teams_microtask == NULL &&
794#endif
795 team->t.t_active_level == 1 )
796 { // Only report metadata by master of active team at level 1
797 __kmp_itt_metadata_single( id_ref );
798 }
799#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800 }
801
802 if( __kmp_env_consistency_check ) {
803 if (status && push_ws) {
804 __kmp_push_workshare( gtid, ct_psingle, id_ref );
805 } else {
806 __kmp_check_workshare( gtid, ct_psingle, id_ref );
807 }
808 }
809#if USE_ITT_BUILD
810 if ( status ) {
811 __kmp_itt_single_start( gtid );
812 }
813#endif /* USE_ITT_BUILD */
814 return status;
815}
816
817void
818__kmp_exit_single( int gtid )
819{
820#if USE_ITT_BUILD
821 __kmp_itt_single_end( gtid );
822#endif /* USE_ITT_BUILD */
823 if( __kmp_env_consistency_check )
824 __kmp_pop_workshare( gtid, ct_psingle, NULL );
825}
826
827
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828/*
829 * determine if we can go parallel or must use a serialized parallel region and
830 * how many threads we can use
831 * set_nproc is the number of threads requested for the team
832 * returns 0 if we should serialize or only use one thread,
833 * otherwise the number of threads to use
834 * The forkjoin lock is held by the caller.
835 */
836static int
837__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
838 int master_tid, int set_nthreads
839#if OMP_40_ENABLED
840 , int enter_teams
841#endif /* OMP_40_ENABLED */
842)
843{
844 int capacity;
845 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 KMP_DEBUG_ASSERT( __kmp_init_serial );
847 KMP_DEBUG_ASSERT( root && parent_team );
848
849 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000850 // If dyn-var is set, dynamically adjust the number of desired threads,
851 // according to the method specified by dynamic_mode.
852 //
853 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000854 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
855 ;
856 }
857#ifdef USE_LOAD_BALANCE
858 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
859 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
860 if ( new_nthreads == 1 ) {
861 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
862 master_tid ));
863 return 1;
864 }
865 if ( new_nthreads < set_nthreads ) {
866 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
867 master_tid, new_nthreads ));
868 }
869 }
870#endif /* USE_LOAD_BALANCE */
871 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
872 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
873 : root->r.r_hot_team->t.t_nproc);
874 if ( new_nthreads <= 1 ) {
875 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
876 master_tid ));
877 return 1;
878 }
879 if ( new_nthreads < set_nthreads ) {
880 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
881 master_tid, new_nthreads ));
882 }
883 else {
884 new_nthreads = set_nthreads;
885 }
886 }
887 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
888 if ( set_nthreads > 2 ) {
889 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
890 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
891 if ( new_nthreads == 1 ) {
892 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
893 master_tid ));
894 return 1;
895 }
896 if ( new_nthreads < set_nthreads ) {
897 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
898 master_tid, new_nthreads ));
899 }
900 }
901 }
902 else {
903 KMP_ASSERT( 0 );
904 }
905
906 //
907 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
908 //
909 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
910 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
911 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc );
913 if ( tl_nthreads <= 0 ) {
914 tl_nthreads = 1;
915 }
916
917 //
918 // If dyn-var is false, emit a 1-time warning.
919 //
920 if ( ! get__dynamic_2( parent_team, master_tid )
921 && ( ! __kmp_reserve_warn ) ) {
922 __kmp_reserve_warn = 1;
923 __kmp_msg(
924 kmp_ms_warning,
925 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
926 KMP_HNT( Unset_ALL_THREADS ),
927 __kmp_msg_null
928 );
929 }
930 if ( tl_nthreads == 1 ) {
931 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
932 master_tid ));
933 return 1;
934 }
935 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
936 master_tid, tl_nthreads ));
937 new_nthreads = tl_nthreads;
938 }
939
Jim Cownie5e8470a2013-09-27 10:38:44 +0000940 //
941 // Check if the threads array is large enough, or needs expanding.
942 //
943 // See comment in __kmp_register_root() about the adjustment if
944 // __kmp_threads[0] == NULL.
945 //
946 capacity = __kmp_threads_capacity;
947 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
948 --capacity;
949 }
950 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
951 root->r.r_hot_team->t.t_nproc ) > capacity ) {
952 //
953 // Expand the threads array.
954 //
955 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
956 root->r.r_hot_team->t.t_nproc ) - capacity;
957 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
958 if ( slotsAdded < slotsRequired ) {
959 //
960 // The threads array was not expanded enough.
961 //
962 new_nthreads -= ( slotsRequired - slotsAdded );
963 KMP_ASSERT( new_nthreads >= 1 );
964
965 //
966 // If dyn-var is false, emit a 1-time warning.
967 //
968 if ( ! get__dynamic_2( parent_team, master_tid )
969 && ( ! __kmp_reserve_warn ) ) {
970 __kmp_reserve_warn = 1;
971 if ( __kmp_tp_cached ) {
972 __kmp_msg(
973 kmp_ms_warning,
974 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
975 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
976 KMP_HNT( PossibleSystemLimitOnThreads ),
977 __kmp_msg_null
978 );
979 }
980 else {
981 __kmp_msg(
982 kmp_ms_warning,
983 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
984 KMP_HNT( SystemLimitOnThreads ),
985 __kmp_msg_null
986 );
987 }
988 }
989 }
990 }
991
992 if ( new_nthreads == 1 ) {
993 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
994 __kmp_get_gtid(), set_nthreads ) );
995 return 1;
996 }
997
998 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
999 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1000 return new_nthreads;
1001}
1002
1003/* ------------------------------------------------------------------------ */
1004/* ------------------------------------------------------------------------ */
1005
1006/* allocate threads from the thread pool and assign them to the new team */
1007/* we are assured that there are enough threads available, because we
1008 * checked on that earlier within critical section forkjoin */
1009
1010static void
1011__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1012 kmp_info_t *master_th, int master_gtid )
1013{
1014 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001015 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001016
1017 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1018 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1019 KMP_MB();
1020
1021 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001022 master_th->th.th_info.ds.ds_tid = 0;
1023 master_th->th.th_team = team;
1024 master_th->th.th_team_nproc = team->t.t_nproc;
1025 master_th->th.th_team_master = master_th;
1026 master_th->th.th_team_serialized = FALSE;
1027 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
1029 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001030#if KMP_NESTED_HOT_TEAMS
1031 use_hot_team = 0;
1032 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1033 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1034 int level = team->t.t_active_level - 1; // index in array of hot teams
1035 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1036 if( master_th->th.th_teams_size.nteams > 1 ) {
1037 ++level; // level was not increased in teams construct for team_of_masters
1038 }
1039 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1040 master_th->th.th_teams_level == team->t.t_level ) {
1041 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1042 } // team->t.t_level will be increased inside parallel
1043 }
1044 if( level < __kmp_hot_teams_max_level ) {
1045 if( hot_teams[level].hot_team ) {
1046 // hot team has already been allocated for given level
1047 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1048 use_hot_team = 1; // the team is ready to use
1049 } else {
1050 use_hot_team = 0; // AC: threads are not allocated yet
1051 hot_teams[level].hot_team = team; // remember new hot team
1052 hot_teams[level].hot_team_nth = team->t.t_nproc;
1053 }
1054 } else {
1055 use_hot_team = 0;
1056 }
1057 }
1058#else
1059 use_hot_team = team == root->r.r_hot_team;
1060#endif
1061 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062
1063 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065 __kmp_initialize_info( master_th, team, 0, master_gtid );
1066
1067 /* now, install the worker threads */
1068 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1069
1070 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001071 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1072 team->t.t_threads[ i ] = thr;
1073 KMP_DEBUG_ASSERT( thr );
1074 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001076 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1078 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1079 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1080 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001081#if OMP_40_ENABLED
1082 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1083 thr->th.th_teams_level = master_th->th.th_teams_level;
1084 thr->th.th_teams_size = master_th->th.th_teams_size;
1085#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086 { // Initialize threads' barrier data.
1087 int b;
1088 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1089 for ( b = 0; b < bs_last_barrier; ++ b ) {
1090 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001092#if USE_DEBUGGER
1093 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1094#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095 }; // for b
1096 }
1097 }
1098
Alp Toker98758b02014-03-02 04:12:06 +00001099#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 __kmp_partition_places( team );
1101#endif
1102
1103 }
1104
1105 KMP_MB();
1106}
1107
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001108#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1109//
1110// Propagate any changes to the floating point control registers out to the team
1111// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1112// so we don't make changes unless they are needed.
1113//
1114inline static void
1115propagateFPControl(kmp_team_t * team)
1116{
1117 if ( __kmp_inherit_fp_control ) {
1118 kmp_int16 x87_fpu_control_word;
1119 kmp_uint32 mxcsr;
1120
1121 // Get master values of FPU control flags (both X87 and vector)
1122 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1123 __kmp_store_mxcsr( &mxcsr );
1124 mxcsr &= KMP_X86_MXCSR_MASK;
1125
1126 // There is no point looking at t_fp_control_saved here.
1127 // If it is TRUE, we still have to update the values if they are different from those we now have.
1128 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1129 // that the values in the team are the same as those we have.
1130 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1131 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1132 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001133 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1134 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001135 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1136 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001137 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001138 }
1139 else {
1140 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001142 }
1143}
1144
1145// Do the opposite, setting the hardware registers to the updated values from the team.
1146inline static void
1147updateHWFPControl(kmp_team_t * team)
1148{
1149 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1150 //
1151 // Only reset the fp control regs if they have been changed in the team.
1152 // the parallel region that we are exiting.
1153 //
1154 kmp_int16 x87_fpu_control_word;
1155 kmp_uint32 mxcsr;
1156 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1157 __kmp_store_mxcsr( &mxcsr );
1158 mxcsr &= KMP_X86_MXCSR_MASK;
1159
1160 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1161 __kmp_clear_x87_fpu_status_word();
1162 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1163 }
1164
1165 if ( team->t.t_mxcsr != mxcsr ) {
1166 __kmp_load_mxcsr( &team->t.t_mxcsr );
1167 }
1168 }
1169}
1170#else
1171# define propagateFPControl(x) ((void)0)
1172# define updateHWFPControl(x) ((void)0)
1173#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1174
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175static void
1176__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1177
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178/*
1179 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1180 */
1181void
1182__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1183{
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1186
1187 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1188
1189 /* Skip all this code for autopar serialized loops since it results in
1190 unacceptable overhead */
1191 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1192 return;
1193
1194 if( ! TCR_4( __kmp_init_parallel ) )
1195 __kmp_parallel_initialize();
1196
1197 this_thr = __kmp_threads[ global_tid ];
1198 serial_team = this_thr->th.th_serial_team;
1199
1200 /* utilize the serialized team held by this thread */
1201 KMP_DEBUG_ASSERT( serial_team );
1202 KMP_MB();
1203
1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001205 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1206 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001207 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1208 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1209 this_thr->th.th_task_team = NULL;
1210 }
1211
1212#if OMP_40_ENABLED
1213 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1214 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1215 proc_bind = proc_bind_false;
1216 }
1217 else if ( proc_bind == proc_bind_default ) {
1218 //
1219 // No proc_bind clause was specified, so use the current value
1220 // of proc-bind-var for this parallel region.
1221 //
1222 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1223 }
1224 //
1225 // Reset for next parallel region
1226 //
1227 this_thr->th.th_set_proc_bind = proc_bind_default;
1228#endif /* OMP_40_ENABLED */
1229
1230 if( this_thr->th.th_team != serial_team ) {
1231 // Nested level will be an index in the nested nthreads array
1232 int level = this_thr->th.th_team->t.t_level;
1233
1234 if( serial_team->t.t_serialized ) {
1235 /* this serial team was already used
1236 * TODO increase performance by making this locks more specific */
1237 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238
1239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1240
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001241#if OMPT_SUPPORT
1242 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1243#endif
1244
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001246#if OMPT_SUPPORT
1247 ompt_parallel_id,
1248#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001249#if OMP_40_ENABLED
1250 proc_bind,
1251#endif
1252 & this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL) );
1254 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1255 KMP_ASSERT( new_team );
1256
1257 /* setup new serialized team and install it */
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1262
1263 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1264 global_tid, serial_team ) );
1265
1266
1267 /* TODO the above breaks the requirement that if we run out of
1268 * resources, then we can still guarantee that serialized teams
1269 * are ok, since we may need to allocate a new one */
1270 } else {
1271 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1272 global_tid, serial_team ) );
1273 }
1274
1275 /* we have to initialize this serial team */
1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1277 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1278 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1279 serial_team->t.t_ident = loc;
1280 serial_team->t.t_serialized = 1;
1281 serial_team->t.t_nproc = 1;
1282 serial_team->t.t_parent = this_thr->th.th_team;
1283 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1284 this_thr->th.th_team = serial_team;
1285 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1286
1287 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1288 global_tid, this_thr->th.th_current_task ) );
1289 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1290 this_thr->th.th_current_task->td_flags.executing = 0;
1291
1292 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1293
1294 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1295 each serialized task represented by team->t.t_serialized? */
1296 copy_icvs(
1297 & this_thr->th.th_current_task->td_icvs,
1298 & this_thr->th.th_current_task->td_parent->td_icvs );
1299
1300 // Thread value exists in the nested nthreads array for the next nested level
1301 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1302 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1303 }
1304
1305#if OMP_40_ENABLED
1306 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1307 this_thr->th.th_current_task->td_icvs.proc_bind
1308 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1309 }
1310#endif /* OMP_40_ENABLED */
1311
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001312#if USE_DEBUGGER
1313 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1314#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315 this_thr->th.th_info.ds.ds_tid = 0;
1316
1317 /* set thread cache values */
1318 this_thr->th.th_team_nproc = 1;
1319 this_thr->th.th_team_master = this_thr;
1320 this_thr->th.th_team_serialized = 1;
1321
1322 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1323 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1324
1325 propagateFPControl (serial_team);
1326
1327 /* check if we need to allocate dispatch buffers stack */
1328 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1329 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1330 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1331 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1332 }
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1334
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335#if OMPT_SUPPORT
1336 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1337 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1338#endif
1339
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001340 KMP_MB();
1341
1342 } else {
1343 /* this serialized team is already being used,
1344 * that's fine, just add another nested level */
1345 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1347 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1348 ++ serial_team->t.t_serialized;
1349 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1350
1351 // Nested level will be an index in the nested nthreads array
1352 int level = this_thr->th.th_team->t.t_level;
1353 // Thread value exists in the nested nthreads array for the next nested level
1354 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1355 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1356 }
1357 serial_team->t.t_level++;
1358 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1359 global_tid, serial_team, serial_team->t.t_level ) );
1360
1361 /* allocate/push dispatch buffers stack */
1362 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1363 {
1364 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1365 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1366 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1367 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1368 }
1369 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1370
1371 KMP_MB();
1372 }
1373
1374 if ( __kmp_env_consistency_check )
1375 __kmp_push_parallel( global_tid, NULL );
1376
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001377}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001378
Jim Cownie5e8470a2013-09-27 10:38:44 +00001379/* most of the work for a fork */
1380/* return true if we really went parallel, false if serialized */
1381int
1382__kmp_fork_call(
1383 ident_t * loc,
1384 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001385 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001387#if OMPT_SUPPORT
1388 void *unwrapped_task,
1389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 microtask_t microtask,
1391 launch_t invoker,
1392/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001393#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394 va_list * ap
1395#else
1396 va_list ap
1397#endif
1398 )
1399{
1400 void **argv;
1401 int i;
1402 int master_tid;
1403 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001404 kmp_team_t *team;
1405 kmp_team_t *parent_team;
1406 kmp_info_t *master_th;
1407 kmp_root_t *root;
1408 int nthreads;
1409 int master_active;
1410 int master_set_numthreads;
1411 int level;
1412#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 int teams_level;
1415#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001416#if KMP_NESTED_HOT_TEAMS
1417 kmp_hot_team_ptr_t **p_hot_teams;
1418#endif
1419 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001420 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001421 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001422
1423 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001424 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1425 /* Some systems prefer the stack for the root thread(s) to start with */
1426 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001427 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 /* These 2 lines below are so this does not get optimized out */
1429 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1430 __kmp_stkpadding += (short)((kmp_int64)dummy);
1431 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432
1433 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001434 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435 if( ! TCR_4(__kmp_init_parallel) )
1436 __kmp_parallel_initialize();
1437
1438 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001439 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1440 parent_team = master_th->th.th_team;
1441 master_tid = master_th->th.th_info.ds.ds_tid;
1442 master_this_cons = master_th->th.th_local.this_construct;
1443 root = master_th->th.th_root;
1444 master_active = root->r.r_active;
1445 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001446
1447#if OMPT_SUPPORT
1448 ompt_parallel_id_t ompt_parallel_id;
1449 ompt_task_id_t ompt_task_id;
1450 ompt_frame_t *ompt_frame;
1451 ompt_task_id_t my_task_id;
1452 ompt_parallel_id_t my_parallel_id;
1453
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001454 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001455 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1456 ompt_task_id = __ompt_get_task_id_internal(0);
1457 ompt_frame = __ompt_get_task_frame_internal(0);
1458 }
1459#endif
1460
Jim Cownie5e8470a2013-09-27 10:38:44 +00001461 // Nested level will be an index in the nested nthreads array
1462 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001463 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001464#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001465 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1466#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001467#if KMP_NESTED_HOT_TEAMS
1468 p_hot_teams = &master_th->th.th_hot_teams;
1469 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1470 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1473 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1474 }
1475#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001476
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001477#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001478 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1480 int team_size = master_set_numthreads;
1481
1482 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1483 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001484 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001485 }
1486#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 master_th->th.th_ident = loc;
1489
1490#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001491 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1493 // AC: This is start of parallel that is nested inside teams construct.
1494 // The team is actual (hot), all workers are ready at the fork barrier.
1495 // No lock needed to initialize the team a bit, then free workers.
1496 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001497 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001498 parent_team->t.t_argc = argc;
1499 argv = (void**)parent_team->t.t_argv;
1500 for( i=argc-1; i >= 0; --i )
1501/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001502#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503 *argv++ = va_arg( *ap, void * );
1504#else
1505 *argv++ = va_arg( ap, void * );
1506#endif
1507 /* Increment our nested depth levels, but not increase the serialization */
1508 if ( parent_team == master_th->th.th_serial_team ) {
1509 // AC: we are in serialized parallel
1510 __kmpc_serialized_parallel(loc, gtid);
1511 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1512 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1513 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001514
1515#if OMPT_SUPPORT
1516 void *dummy;
1517 void **exit_runtime_p;
1518
1519 ompt_lw_taskteam_t lw_taskteam;
1520
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001521 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1523 unwrapped_task, ompt_parallel_id);
1524 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1525 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1526
1527 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1528
1529#if OMPT_TRACE
1530 /* OMPT implicit task begin */
1531 my_task_id = lw_taskteam.ompt_task_info.task_id;
1532 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001533 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001534 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1535 my_parallel_id, my_task_id);
1536 }
1537#endif
1538
1539 /* OMPT state */
1540 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1541 } else {
1542 exit_runtime_p = &dummy;
1543 }
1544#endif
1545
Jonathan Peyton45be4502015-08-11 21:36:41 +00001546 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001549 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001550#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001553 );
1554 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555
1556#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001557 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001558 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001559#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001560 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001562 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1564 ompt_parallel_id, ompt_task_id);
1565 }
1566
1567 __ompt_lw_taskteam_unlink(master_th);
1568 // reset clear the task id only after unlinking the task
1569 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1570#endif
1571
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001572 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001573 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001574 ompt_parallel_id, ompt_task_id,
1575 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001576 }
1577 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1578 }
1579#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 return TRUE;
1581 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001582
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001584#if OMPT_SUPPORT
1585 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1586#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 parent_team->t.t_invoke = invoker;
1588 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1589 parent_team->t.t_active_level ++;
1590 parent_team->t.t_level ++;
1591
1592 /* Change number of threads in the team if requested */
1593 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001594 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595 // AC: only can reduce the number of threads dynamically, cannot increase
1596 kmp_info_t **other_threads = parent_team->t.t_threads;
1597 parent_team->t.t_nproc = master_set_numthreads;
1598 for ( i = 0; i < master_set_numthreads; ++i ) {
1599 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1600 }
1601 // Keep extra threads hot in the team for possible next parallels
1602 }
1603 master_th->th.th_set_nproc = 0;
1604 }
1605
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001606#if USE_DEBUGGER
1607 if ( __kmp_debugging ) { // Let debugger override number of threads.
1608 int nth = __kmp_omp_num_threads( loc );
1609 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1610 master_set_numthreads = nth;
1611 }; // if
1612 }; // if
1613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
1615 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1616 __kmp_internal_fork( loc, gtid, parent_team );
1617 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1618
1619 /* Invoke microtask for MASTER thread */
1620 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1621 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1622
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001623 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001624 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1625 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626 if (! parent_team->t.t_invoke( gtid )) {
1627 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1628 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629 }
1630 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1631 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1632 KMP_MB(); /* Flush all pending memory write invalidates. */
1633
1634 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1635
1636 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001637 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638#endif /* OMP_40_ENABLED */
1639
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001640#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001642 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1647 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001648 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001649#if OMP_40_ENABLED
1650 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1651#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001652 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001653 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001654
1655 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1656 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1657 if (nthreads > 1) {
1658 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1659#if OMP_40_ENABLED
1660 && !enter_teams
1661#endif /* OMP_40_ENABLED */
1662 ) ) || ( __kmp_library == library_serial ) ) {
1663 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1664 gtid, nthreads ));
1665 nthreads = 1;
1666 }
1667 }
1668 if ( nthreads > 1 ) {
1669 /* determine how many new threads we can use */
1670 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1671
1672 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001674/* AC: If we execute teams from parallel region (on host), then teams should be created
1675 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1676 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001677 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001679 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001680 if ( nthreads == 1 ) {
1681 // Free lock for single thread execution here;
1682 // for multi-thread execution it will be freed later
1683 // after team of threads created and initialized
1684 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1685 }
1686 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687 }
1688 KMP_DEBUG_ASSERT( nthreads > 0 );
1689
1690 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001691 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693 /* create a serialized parallel region? */
1694 if ( nthreads == 1 ) {
1695 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001696#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697 void * args[ argc ];
1698#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001699 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001700#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1703
1704 __kmpc_serialized_parallel(loc, gtid);
1705
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709#if OMP_40_ENABLED
1710 if ( !ap ) {
1711 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001712 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001714
1715#if OMPT_SUPPORT
1716 void *dummy;
1717 void **exit_runtime_p;
1718
1719 ompt_lw_taskteam_t lw_taskteam;
1720
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001721 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001722 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1723 unwrapped_task, ompt_parallel_id);
1724 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1725 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1726
1727 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1728
1729#if OMPT_TRACE
1730 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001731 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001732 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1733 ompt_parallel_id, my_task_id);
1734 }
1735#endif
1736
1737 /* OMPT state */
1738 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1739 } else {
1740 exit_runtime_p = &dummy;
1741 }
1742#endif
1743
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001744 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001745 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1746 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001747 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1748#if OMPT_SUPPORT
1749 , exit_runtime_p
1750#endif
1751 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001752 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753
1754#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001755 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001756 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001757 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758
1759#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001760 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1762 ompt_parallel_id, ompt_task_id);
1763 }
1764#endif
1765
1766 __ompt_lw_taskteam_unlink(master_th);
1767 // reset clear the task id only after unlinking the task
1768 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1769
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001770 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001772 ompt_parallel_id, ompt_task_id,
1773 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774 }
1775 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1776 }
1777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1779 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1780 team = master_th->th.th_team;
1781 //team->t.t_pkfn = microtask;
1782 team->t.t_invoke = invoker;
1783 __kmp_alloc_argv_entries( argc, team, TRUE );
1784 team->t.t_argc = argc;
1785 argv = (void**) team->t.t_argv;
1786 if ( ap ) {
1787 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001788// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001789# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001791# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001793# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 } else {
1795 for( i=0; i < argc; ++i )
1796 // Get args from parent team for teams construct
1797 argv[i] = parent_team->t.t_argv[i];
1798 }
1799 // AC: revert change made in __kmpc_serialized_parallel()
1800 // because initial code in teams should have level=0
1801 team->t.t_level--;
1802 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001804 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1805 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806 invoker(gtid);
1807 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 } else {
1809#endif /* OMP_40_ENABLED */
1810 argv = args;
1811 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001813#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819
1820#if OMPT_SUPPORT
1821 void *dummy;
1822 void **exit_runtime_p;
1823
1824 ompt_lw_taskteam_t lw_taskteam;
1825
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001826 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001827 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1828 unwrapped_task, ompt_parallel_id);
1829 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1830 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1831
1832 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1833
1834#if OMPT_TRACE
1835 /* OMPT implicit task begin */
1836 my_task_id = lw_taskteam.ompt_task_info.task_id;
1837 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001838 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001839 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1840 my_parallel_id, my_task_id);
1841 }
1842#endif
1843
1844 /* OMPT state */
1845 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1846 } else {
1847 exit_runtime_p = &dummy;
1848 }
1849#endif
1850
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001854 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1855#if OMPT_SUPPORT
1856 , exit_runtime_p
1857#endif
1858 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001859 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860
1861#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001862 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001863 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001865 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001866
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001867 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001868 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1869 my_parallel_id, my_task_id);
1870 }
1871#endif
1872
1873 __ompt_lw_taskteam_unlink(master_th);
1874 // reset clear the task id only after unlinking the task
1875 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1876
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001877 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001878 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001879 ompt_parallel_id, ompt_task_id,
1880 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001881 }
1882 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1883 }
1884#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885#if OMP_40_ENABLED
1886 }
1887#endif /* OMP_40_ENABLED */
1888 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001889 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001890#if OMPT_SUPPORT
1891 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1892 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1893 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1894 unwrapped_task, ompt_parallel_id);
1895
1896 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001897 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001898 __ompt_lw_taskteam_link(lwt, master_th);
1899#endif
1900
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001901 // we were called from GNU native code
1902 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1903 return FALSE;
1904 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 }
1908
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001910 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 KMP_MB();
1912 return FALSE;
1913 }
1914
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915 // GEH: only modify the executing flag in the case when not serialized
1916 // serialized case is handled in kmpc_serialized_parallel
1917 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001918 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1919 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1921 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1922 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923
1924#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001925 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926#endif /* OMP_40_ENABLED */
1927 {
1928 /* Increment our nested depth level */
1929 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1930 }
1931
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001934 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1935 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936 }
1937 else {
1938 nthreads_icv = 0; // don't update
1939 }
1940
1941#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001944 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1946 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 }
1948 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 if (proc_bind == proc_bind_default) {
1950 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1951 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001953 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1954 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001956 if ((level+1 < __kmp_nested_proc_bind.used)
1957 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1958 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 }
1960 }
1961
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963 master_th->th.th_set_proc_bind = proc_bind_default;
1964#endif /* OMP_40_ENABLED */
1965
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001966 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001972 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 new_icvs.nproc = nthreads_icv;
1976 }
1977
1978#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001979 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 new_icvs.proc_bind = proc_bind_icv;
1981 }
1982#endif /* OMP_40_ENABLED */
1983
1984 /* allocate a new parallel team */
1985 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1986 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001987#if OMPT_SUPPORT
1988 ompt_parallel_id,
1989#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001991 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1994 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995 /* allocate a new parallel team */
1996 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1997 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001998#if OMPT_SUPPORT
1999 ompt_parallel_id,
2000#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002002 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002004 &master_th->th.th_current_task->td_icvs, argc
2005 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002007 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008
2009 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002010 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2011 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2012 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2013 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2014 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002015#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002016 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002018 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002019 // TODO: parent_team->t.t_level == INT_MAX ???
2020#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002021 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002023 int new_level = parent_team->t.t_level + 1;
2024 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2025 new_level = parent_team->t.t_active_level + 1;
2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#if OMP_40_ENABLED
2028 } else {
2029 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002030 int new_level = parent_team->t.t_level;
2031 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2032 new_level = parent_team->t.t_active_level;
2033 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034 }
2035#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002036 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002037 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002038 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002040#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002041 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002042#endif
2043
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 // Update the floating point rounding in the team if required.
2045 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046
2047 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002048 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002049#if 0
2050 // Patch out an assertion that trips while the runtime seems to operate correctly.
2051 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002052 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002053#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002055 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002056 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002057
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002058 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002059 // Take a memo of master's task_state
2060 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2061 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002062 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2063 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002064 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2067 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2068 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002069 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2070 new_stack[i] = 0;
2071 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002072 old_stack = master_th->th.th_task_state_memo_stack;
2073 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002074 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002075 __kmp_free(old_stack);
2076 }
2077 // Store master's task_state on stack
2078 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2079 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002080#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002081 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002082 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2083 }
2084 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002086 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002087#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002088 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002089#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002090 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002091#if !KMP_NESTED_HOT_TEAMS
2092 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2093#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002094 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
2096 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2097 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2098 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2099 ( team->t.t_master_tid == 0 &&
2100 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2101 KMP_MB();
2102
2103 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002104 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105#if OMP_40_ENABLED
2106 if ( ap ) {
2107#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002108 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002110#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002115 KMP_CHECK_UPDATE(*argv, new_argv);
2116 argv++;
2117 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118#if OMP_40_ENABLED
2119 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002120 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002121 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002122 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124 }
2125#endif /* OMP_40_ENABLED */
2126
2127 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002128 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2130 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002131
2132 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002133 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002135#if OMPT_SUPPORT
2136 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2137#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138
2139 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2140
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002144 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002146 ) {
2147#if USE_ITT_NOTIFY
2148 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2149 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002150 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002151 kmp_uint64 tmp_time = 0;
2152 if ( __itt_get_timestamp_ptr )
2153 tmp_time = __itt_get_timestamp();
2154 // Internal fork - report frame begin
2155 master_th->th.th_frame_time = tmp_time;
2156 if ( __kmp_forkjoin_frames_mode == 3 )
2157 team->t.t_region_time = tmp_time;
2158 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2159#endif /* USE_ITT_NOTIFY */
2160 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2161 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2162 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002163 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2164 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002165 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166#endif /* USE_ITT_BUILD */
2167
2168 /* now go on and do the work */
2169 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2170 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002171 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2172 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002173
2174#if USE_ITT_BUILD
2175 if ( __itt_stack_caller_create_ptr ) {
2176 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2177 }
2178#endif /* USE_ITT_BUILD */
2179
2180#if OMP_40_ENABLED
2181 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2182#endif /* OMP_40_ENABLED */
2183 {
2184 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2186 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187 }
2188
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002189 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2191 return TRUE;
2192 }
2193
2194 /* Invoke microtask for MASTER thread */
2195 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2196 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002197 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002198
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002200 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2201 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002202 if (! team->t.t_invoke( gtid )) {
2203 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205 }
2206 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2207 gtid, team->t.t_id, team->t.t_pkfn ) );
2208 KMP_MB(); /* Flush all pending memory write invalidates. */
2209
2210 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2211
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002212#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002213 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002214 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2215 }
2216#endif
2217
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 return TRUE;
2219}
2220
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002221#if OMPT_SUPPORT
2222static inline void
2223__kmp_join_restore_state(
2224 kmp_info_t *thread,
2225 kmp_team_t *team)
2226{
2227 // restore state outside the region
2228 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2229 ompt_state_work_serial : ompt_state_work_parallel);
2230}
2231
2232static inline void
2233__kmp_join_ompt(
2234 kmp_info_t *thread,
2235 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002236 ompt_parallel_id_t parallel_id,
2237 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002238{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002239 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002241 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002242 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243 }
2244
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002245 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002246 __kmp_join_restore_state(thread,team);
2247}
2248#endif
2249
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002251__kmp_join_call(ident_t *loc, int gtid
2252#if OMPT_SUPPORT
2253 , enum fork_context_e fork_context
2254#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255#if OMP_40_ENABLED
2256 , int exit_teams
2257#endif /* OMP_40_ENABLED */
2258)
2259{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00002260 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002261 kmp_team_t *team;
2262 kmp_team_t *parent_team;
2263 kmp_info_t *master_th;
2264 kmp_root_t *root;
2265 int master_active;
2266 int i;
2267
2268 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2269
2270 /* setup current data */
2271 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002272 root = master_th->th.th_root;
2273 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274 parent_team = team->t.t_parent;
2275
2276 master_th->th.th_ident = loc;
2277
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002278#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002279 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002280 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2281 }
2282#endif
2283
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002284#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2286 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2287 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002288 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2289 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002290 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002291#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292
2293 if( team->t.t_serialized ) {
2294#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002295 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296 // We are in teams construct
2297 int level = team->t.t_level;
2298 int tlevel = master_th->th.th_teams_level;
2299 if ( level == tlevel ) {
2300 // AC: we haven't incremented it earlier at start of teams construct,
2301 // so do it here - at the end of teams construct
2302 team->t.t_level++;
2303 } else if ( level == tlevel + 1 ) {
2304 // AC: we are exiting parallel inside teams, need to increment serialization
2305 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2306 team->t.t_serialized++;
2307 }
2308 }
2309#endif /* OMP_40_ENABLED */
2310 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002311
2312#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002313 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002314 __kmp_join_restore_state(master_th, parent_team);
2315 }
2316#endif
2317
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318 return;
2319 }
2320
2321 master_active = team->t.t_master_active;
2322
2323#if OMP_40_ENABLED
2324 if (!exit_teams)
2325#endif /* OMP_40_ENABLED */
2326 {
2327 // AC: No barrier for internal teams at exit from teams construct.
2328 // But there is barrier for external team (league).
2329 __kmp_internal_join( loc, gtid, team );
2330 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002331#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002332 else {
2333 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2334 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002335#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002336
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337 KMP_MB();
2338
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002339#if OMPT_SUPPORT
2340 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2341#endif
2342
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343#if USE_ITT_BUILD
2344 if ( __itt_stack_caller_create_ptr ) {
2345 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2346 }
2347
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002348 // Mark end of "parallel" region for VTune.
2349 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002350# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002351 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002353 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002354 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002355 // only one notification scheme (either "submit" or "forking/joined", not both)
2356 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2357 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2358 0, loc, master_th->th.th_team_nproc, 1 );
2359 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2360 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2361 __kmp_itt_region_joined( gtid );
2362 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363#endif /* USE_ITT_BUILD */
2364
2365#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002366 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367 !exit_teams &&
2368 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2369 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2370 // AC: We need to leave the team structure intact at the end
2371 // of parallel inside the teams construct, so that at the next
2372 // parallel same (hot) team works, only adjust nesting levels
2373
2374 /* Decrement our nested depth level */
2375 team->t.t_level --;
2376 team->t.t_active_level --;
2377 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2378
2379 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002380 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002381 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383 kmp_info_t **other_threads = team->t.t_threads;
2384 team->t.t_nproc = new_num;
2385 for ( i = 0; i < old_num; ++i ) {
2386 other_threads[i]->th.th_team_nproc = new_num;
2387 }
2388 // Adjust states of non-used threads of the team
2389 for ( i = old_num; i < new_num; ++i ) {
2390 // Re-initialize thread's barrier data.
2391 int b;
2392 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002393 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002394 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002395 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002396#if USE_DEBUGGER
2397 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2398#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002399 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002400 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2401 // Synchronize thread's task state
2402 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2403 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 }
2405 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002406
2407#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002408 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002409 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002410 }
2411#endif
2412
Jim Cownie5e8470a2013-09-27 10:38:44 +00002413 return;
2414 }
2415#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002417 /* do cleanup and restore the parent team */
2418 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2419 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2420
2421 master_th->th.th_dispatch =
2422 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423
2424 /* jc: The following lock has instructions with REL and ACQ semantics,
2425 separating the parallel user code called in this parallel region
2426 from the serial user code called after this function returns.
2427 */
2428 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2429
2430#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002431 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432#endif /* OMP_40_ENABLED */
2433 {
2434 /* Decrement our nested depth level */
2435 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2436 }
2437 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2438
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002439#if OMPT_SUPPORT && OMPT_TRACE
2440 if(ompt_enabled){
2441 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2442 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2443 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2444 parallel_id, task_info->task_id);
2445 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002446 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002447 task_info->task_id = 0;
2448 }
2449#endif
2450
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2452 0, master_th, team ) );
2453 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454
Alp Toker98758b02014-03-02 04:12:06 +00002455#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456 //
2457 // Restore master thread's partition.
2458 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 master_th->th.th_first_place = team->t.t_first_place;
2460 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461#endif /* OMP_40_ENABLED */
2462
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002463 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002465 if ( root->r.r_active != master_active )
2466 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002468 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
2470 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002471 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002472 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002473 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 * assertion failures. it's only one deref&assign so might as well put this
2475 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002476 master_th->th.th_team = parent_team;
2477 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2478 master_th->th.th_team_master = parent_team->t.t_threads[0];
2479 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480
2481 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002482 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483 parent_team != master_th->th.th_serial_team &&
2484 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002485 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2486 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002487 }
2488
Jim Cownie5e8470a2013-09-27 10:38:44 +00002489 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002490 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2491 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2492 // Remember master's state if we re-use this nested hot team
2493 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002494 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002495 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002496 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002497 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002498 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002499 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002501 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002504 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2505 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2506 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507
2508 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2509
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002510#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002511 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002512 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002513 }
2514#endif
2515
Jim Cownie5e8470a2013-09-27 10:38:44 +00002516 KMP_MB();
2517 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2518}
2519
2520/* ------------------------------------------------------------------------ */
2521/* ------------------------------------------------------------------------ */
2522
2523/* Check whether we should push an internal control record onto the
2524 serial team stack. If so, do it. */
2525void
2526__kmp_save_internal_controls ( kmp_info_t * thread )
2527{
2528
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002529 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002530 return;
2531 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002532 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533 int push = 0;
2534
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002535 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 push = 1;
2537 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002538 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2539 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540 push = 1;
2541 }
2542 }
2543 if (push) { /* push a record on the serial team's stack */
2544 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2545
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002547
2548 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2549
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002550 control->next = thread->th.th_team->t.t_control_stack_top;
2551 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552 }
2553 }
2554}
2555
2556/* Changes set_nproc */
2557void
2558__kmp_set_num_threads( int new_nth, int gtid )
2559{
2560 kmp_info_t *thread;
2561 kmp_root_t *root;
2562
2563 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2564 KMP_DEBUG_ASSERT( __kmp_init_serial );
2565
2566 if (new_nth < 1)
2567 new_nth = 1;
2568 else if (new_nth > __kmp_max_nth)
2569 new_nth = __kmp_max_nth;
2570
Jonathan Peyton45be4502015-08-11 21:36:41 +00002571 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 thread = __kmp_threads[gtid];
2573
2574 __kmp_save_internal_controls( thread );
2575
2576 set__nproc( thread, new_nth );
2577
2578 //
2579 // If this omp_set_num_threads() call will cause the hot team size to be
2580 // reduced (in the absence of a num_threads clause), then reduce it now,
2581 // rather than waiting for the next parallel region.
2582 //
2583 root = thread->th.th_root;
2584 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002585 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2586#if KMP_NESTED_HOT_TEAMS
2587 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2588#endif
2589 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002590 kmp_team_t *hot_team = root->r.r_hot_team;
2591 int f;
2592
2593 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2594
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2597 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002598 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2599 // When decreasing team size, threads no longer in the team should unref task team.
2600 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2601 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002602 __kmp_free_thread( hot_team->t.t_threads[f] );
2603 hot_team->t.t_threads[f] = NULL;
2604 }
2605 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002606#if KMP_NESTED_HOT_TEAMS
2607 if( thread->th.th_hot_teams ) {
2608 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2609 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2610 }
2611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2614
2615 //
2616 // Update the t_nproc field in the threads that are still active.
2617 //
2618 for( f=0 ; f < new_nth; f++ ) {
2619 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2620 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002623 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002625}
2626
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627/* Changes max_active_levels */
2628void
2629__kmp_set_max_active_levels( int gtid, int max_active_levels )
2630{
2631 kmp_info_t *thread;
2632
2633 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2634 KMP_DEBUG_ASSERT( __kmp_init_serial );
2635
2636 // validate max_active_levels
2637 if( max_active_levels < 0 ) {
2638 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2639 // We ignore this call if the user has specified a negative value.
2640 // The current setting won't be changed. The last valid setting will be used.
2641 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2642 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2643 return;
2644 }
2645 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2646 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2647 // We allow a zero value. (implementation defined behavior)
2648 } else {
2649 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2650 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2651 // Current upper limit is MAX_INT. (implementation defined behavior)
2652 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2653 // Actually, the flow should never get here until we use MAX_INT limit.
2654 }
2655 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2656
2657 thread = __kmp_threads[ gtid ];
2658
2659 __kmp_save_internal_controls( thread );
2660
2661 set__max_active_levels( thread, max_active_levels );
2662
2663}
2664
2665/* Gets max_active_levels */
2666int
2667__kmp_get_max_active_levels( int gtid )
2668{
2669 kmp_info_t *thread;
2670
2671 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2672 KMP_DEBUG_ASSERT( __kmp_init_serial );
2673
2674 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002675 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002677 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2678 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002679}
2680
2681/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2682void
2683__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2684{
2685 kmp_info_t *thread;
2686// kmp_team_t *team;
2687
2688 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2689 KMP_DEBUG_ASSERT( __kmp_init_serial );
2690
2691 // Check if the kind parameter is valid, correct if needed.
2692 // Valid parameters should fit in one of two intervals - standard or extended:
2693 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2694 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2695 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2696 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2697 {
2698 // TODO: Hint needs attention in case we change the default schedule.
2699 __kmp_msg(
2700 kmp_ms_warning,
2701 KMP_MSG( ScheduleKindOutOfRange, kind ),
2702 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2703 __kmp_msg_null
2704 );
2705 kind = kmp_sched_default;
2706 chunk = 0; // ignore chunk value in case of bad kind
2707 }
2708
2709 thread = __kmp_threads[ gtid ];
2710
2711 __kmp_save_internal_controls( thread );
2712
2713 if ( kind < kmp_sched_upper_std ) {
2714 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2715 // differ static chunked vs. unchunked:
2716 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002717 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 }
2721 } else {
2722 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002723 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2725 }
2726 if ( kind == kmp_sched_auto ) {
2727 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002728 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002730 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731 }
2732}
2733
2734/* Gets def_sched_var ICV values */
2735void
2736__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2737{
2738 kmp_info_t *thread;
2739 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740
2741 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2742 KMP_DEBUG_ASSERT( __kmp_init_serial );
2743
2744 thread = __kmp_threads[ gtid ];
2745
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002746 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747
2748 switch ( th_type ) {
2749 case kmp_sch_static:
2750 case kmp_sch_static_greedy:
2751 case kmp_sch_static_balanced:
2752 *kind = kmp_sched_static;
2753 *chunk = 0; // chunk was not set, try to show this fact via zero value
2754 return;
2755 case kmp_sch_static_chunked:
2756 *kind = kmp_sched_static;
2757 break;
2758 case kmp_sch_dynamic_chunked:
2759 *kind = kmp_sched_dynamic;
2760 break;
2761 case kmp_sch_guided_chunked:
2762 case kmp_sch_guided_iterative_chunked:
2763 case kmp_sch_guided_analytical_chunked:
2764 *kind = kmp_sched_guided;
2765 break;
2766 case kmp_sch_auto:
2767 *kind = kmp_sched_auto;
2768 break;
2769 case kmp_sch_trapezoidal:
2770 *kind = kmp_sched_trapezoidal;
2771 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002772#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773 case kmp_sch_static_steal:
2774 *kind = kmp_sched_static_steal;
2775 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002776#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777 default:
2778 KMP_FATAL( UnknownSchedulingType, th_type );
2779 }
2780
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002781 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782}
2783
2784int
2785__kmp_get_ancestor_thread_num( int gtid, int level ) {
2786
2787 int ii, dd;
2788 kmp_team_t *team;
2789 kmp_info_t *thr;
2790
2791 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2792 KMP_DEBUG_ASSERT( __kmp_init_serial );
2793
2794 // validate level
2795 if( level == 0 ) return 0;
2796 if( level < 0 ) return -1;
2797 thr = __kmp_threads[ gtid ];
2798 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002799 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800 if( level > ii ) return -1;
2801
2802#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002803 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002804 // AC: we are in teams region where multiple nested teams have same level
2805 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2806 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2807 KMP_DEBUG_ASSERT( ii >= tlevel );
2808 // AC: As we need to pass by the teams league, we need to artificially increase ii
2809 if ( ii == tlevel ) {
2810 ii += 2; // three teams have same level
2811 } else {
2812 ii ++; // two teams have same level
2813 }
2814 }
2815 }
2816#endif
2817
2818 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2819
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002820 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821 level++;
2822 while( ii > level )
2823 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002824 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825 {
2826 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002827 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828 team = team->t.t_parent;
2829 continue;
2830 }
2831 if( ii > level ) {
2832 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002833 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002834 ii--;
2835 }
2836 }
2837
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002838 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839}
2840
2841int
2842__kmp_get_team_size( int gtid, int level ) {
2843
2844 int ii, dd;
2845 kmp_team_t *team;
2846 kmp_info_t *thr;
2847
2848 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2849 KMP_DEBUG_ASSERT( __kmp_init_serial );
2850
2851 // validate level
2852 if( level == 0 ) return 1;
2853 if( level < 0 ) return -1;
2854 thr = __kmp_threads[ gtid ];
2855 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002856 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857 if( level > ii ) return -1;
2858
2859#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002860 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002861 // AC: we are in teams region where multiple nested teams have same level
2862 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2863 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2864 KMP_DEBUG_ASSERT( ii >= tlevel );
2865 // AC: As we need to pass by the teams league, we need to artificially increase ii
2866 if ( ii == tlevel ) {
2867 ii += 2; // three teams have same level
2868 } else {
2869 ii ++; // two teams have same level
2870 }
2871 }
2872 }
2873#endif
2874
2875 while( ii > level )
2876 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002877 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002878 {
2879 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002880 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 team = team->t.t_parent;
2882 continue;
2883 }
2884 if( ii > level ) {
2885 team = team->t.t_parent;
2886 ii--;
2887 }
2888 }
2889
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002890 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002891}
2892
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893kmp_r_sched_t
2894__kmp_get_schedule_global() {
2895// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2896// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2897
2898 kmp_r_sched_t r_sched;
2899
2900 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2901 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2902 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2903 if ( __kmp_sched == kmp_sch_static ) {
2904 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2905 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2906 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2907 } else {
2908 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2909 }
2910
2911 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2912 r_sched.chunk = KMP_DEFAULT_CHUNK;
2913 } else {
2914 r_sched.chunk = __kmp_chunk;
2915 }
2916
2917 return r_sched;
2918}
2919
2920/* ------------------------------------------------------------------------ */
2921/* ------------------------------------------------------------------------ */
2922
2923
2924/*
2925 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2926 * at least argc number of *t_argv entries for the requested team.
2927 */
2928static void
2929__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2930{
2931
2932 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002933 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002934
2935 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2936 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002938 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2939 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940
2941 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2942 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002943 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2945 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002946 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947 if ( __kmp_storage_map ) {
2948 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2949 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2950 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2951 "team_%d.t_inline_argv",
2952 team->t.t_id );
2953 }
2954 } else {
2955 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002956 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2958 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2959 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002960 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002961 if ( __kmp_storage_map ) {
2962 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2963 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2964 team->t.t_id );
2965 }
2966 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967 }
2968}
2969
2970static void
2971__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2972{
2973 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002974 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2976 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002978 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002979 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980 team->t.t_max_nproc = max_nth;
2981
2982 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002983 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002984 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002985#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002986 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2987#endif
2988 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002989}
2990
2991static void
2992__kmp_free_team_arrays(kmp_team_t *team) {
2993 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2994 int i;
2995 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2996 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2997 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2998 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2999 }; // if
3000 }; // for
3001 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00003002 __kmp_free(team->t.t_disp_buffer);
3003 __kmp_free(team->t.t_dispatch);
3004 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005 team->t.t_threads = NULL;
3006 team->t.t_disp_buffer = NULL;
3007 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003008 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009}
3010
3011static void
3012__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3013 kmp_info_t **oldThreads = team->t.t_threads;
3014
Jonathan Peytona58563d2016-03-29 20:05:27 +00003015 __kmp_free(team->t.t_disp_buffer);
3016 __kmp_free(team->t.t_dispatch);
3017 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003018 __kmp_allocate_team_arrays(team, max_nth);
3019
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003020 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003021
3022 __kmp_free(oldThreads);
3023}
3024
3025static kmp_internal_control_t
3026__kmp_get_global_icvs( void ) {
3027
Jim Cownie5e8470a2013-09-27 10:38:44 +00003028 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003029
3030#if OMP_40_ENABLED
3031 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3032#endif /* OMP_40_ENABLED */
3033
3034 kmp_internal_control_t g_icvs = {
3035 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003036 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3037 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3038 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003040#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003042#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003043 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3044 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003045 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3046 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047#if OMP_40_ENABLED
3048 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003049 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050#endif /* OMP_40_ENABLED */
3051 NULL //struct kmp_internal_control *next;
3052 };
3053
3054 return g_icvs;
3055}
3056
3057static kmp_internal_control_t
3058__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3059
Jim Cownie5e8470a2013-09-27 10:38:44 +00003060 kmp_internal_control_t gx_icvs;
3061 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3062 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3063 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003064
3065 return gx_icvs;
3066}
3067
3068static void
3069__kmp_initialize_root( kmp_root_t *root )
3070{
3071 int f;
3072 kmp_team_t *root_team;
3073 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003075 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3076 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 KMP_DEBUG_ASSERT( root );
3078 KMP_ASSERT( ! root->r.r_begin );
3079
3080 /* setup the root state structure */
3081 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003082 root->r.r_begin = FALSE;
3083 root->r.r_active = FALSE;
3084 root->r.r_in_parallel = 0;
3085 root->r.r_blocktime = __kmp_dflt_blocktime;
3086 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087
3088 /* setup the root team for this task */
3089 /* allocate the root team structure */
3090 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003091
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092 root_team =
3093 __kmp_allocate_team(
3094 root,
3095 1, // new_nproc
3096 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003097#if OMPT_SUPPORT
3098 0, // root parallel id
3099#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100#if OMP_40_ENABLED
3101 __kmp_nested_proc_bind.bind_types[0],
3102#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003105 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003107#if USE_DEBUGGER
3108 // Non-NULL value should be assigned to make the debugger display the root team.
3109 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3110#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003111
3112 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3113
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003114 root->r.r_root_team = root_team;
3115 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003116
3117 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003118 root_team->t.t_threads[0] = NULL;
3119 root_team->t.t_nproc = 1;
3120 root_team->t.t_serialized = 1;
3121 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3122 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3123 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3125 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3126
3127 /* setup the hot team for this task */
3128 /* allocate the hot team structure */
3129 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003130
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131 hot_team =
3132 __kmp_allocate_team(
3133 root,
3134 1, // new_nproc
3135 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003136#if OMPT_SUPPORT
3137 0, // root parallel id
3138#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139#if OMP_40_ENABLED
3140 __kmp_nested_proc_bind.bind_types[0],
3141#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003142 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003144 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145 );
3146 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3147
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003148 root->r.r_hot_team = hot_team;
3149 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003150
3151 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003152 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003153
3154 /* initialize hot team */
3155 hot_team_max_nth = hot_team->t.t_max_nproc;
3156 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003157 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003158 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003159 hot_team->t.t_nproc = 1;
3160 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3161 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3162 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003163 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003164}
3165
3166#ifdef KMP_DEBUG
3167
3168
3169typedef struct kmp_team_list_item {
3170 kmp_team_p const * entry;
3171 struct kmp_team_list_item * next;
3172} kmp_team_list_item_t;
3173typedef kmp_team_list_item_t * kmp_team_list_t;
3174
3175
3176static void
3177__kmp_print_structure_team_accum( // Add team to list of teams.
3178 kmp_team_list_t list, // List of teams.
3179 kmp_team_p const * team // Team to add.
3180) {
3181
3182 // List must terminate with item where both entry and next are NULL.
3183 // Team is added to the list only once.
3184 // List is sorted in ascending order by team id.
3185 // Team id is *not* a key.
3186
3187 kmp_team_list_t l;
3188
3189 KMP_DEBUG_ASSERT( list != NULL );
3190 if ( team == NULL ) {
3191 return;
3192 }; // if
3193
3194 __kmp_print_structure_team_accum( list, team->t.t_parent );
3195 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3196
3197 // Search list for the team.
3198 l = list;
3199 while ( l->next != NULL && l->entry != team ) {
3200 l = l->next;
3201 }; // while
3202 if ( l->next != NULL ) {
3203 return; // Team has been added before, exit.
3204 }; // if
3205
3206 // Team is not found. Search list again for insertion point.
3207 l = list;
3208 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3209 l = l->next;
3210 }; // while
3211
3212 // Insert team.
3213 {
3214 kmp_team_list_item_t * item =
3215 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3216 * item = * l;
3217 l->entry = team;
3218 l->next = item;
3219 }
3220
3221}
3222
3223static void
3224__kmp_print_structure_team(
3225 char const * title,
3226 kmp_team_p const * team
3227
3228) {
3229 __kmp_printf( "%s", title );
3230 if ( team != NULL ) {
3231 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3232 } else {
3233 __kmp_printf( " - (nil)\n" );
3234 }; // if
3235}
3236
3237static void
3238__kmp_print_structure_thread(
3239 char const * title,
3240 kmp_info_p const * thread
3241
3242) {
3243 __kmp_printf( "%s", title );
3244 if ( thread != NULL ) {
3245 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3246 } else {
3247 __kmp_printf( " - (nil)\n" );
3248 }; // if
3249}
3250
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003251void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003252__kmp_print_structure(
3253 void
3254) {
3255
3256 kmp_team_list_t list;
3257
3258 // Initialize list of teams.
3259 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3260 list->entry = NULL;
3261 list->next = NULL;
3262
3263 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3264 {
3265 int gtid;
3266 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3267 __kmp_printf( "%2d", gtid );
3268 if ( __kmp_threads != NULL ) {
3269 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3270 }; // if
3271 if ( __kmp_root != NULL ) {
3272 __kmp_printf( " %p", __kmp_root[ gtid ] );
3273 }; // if
3274 __kmp_printf( "\n" );
3275 }; // for gtid
3276 }
3277
3278 // Print out __kmp_threads array.
3279 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3280 if ( __kmp_threads != NULL ) {
3281 int gtid;
3282 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3283 kmp_info_t const * thread = __kmp_threads[ gtid ];
3284 if ( thread != NULL ) {
3285 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3286 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3287 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3288 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3289 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3290 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3291 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3292 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3293#if OMP_40_ENABLED
3294 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3295#endif
3296 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3297 __kmp_printf( "\n" );
3298 __kmp_print_structure_team_accum( list, thread->th.th_team );
3299 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3300 }; // if
3301 }; // for gtid
3302 } else {
3303 __kmp_printf( "Threads array is not allocated.\n" );
3304 }; // if
3305
3306 // Print out __kmp_root array.
3307 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3308 if ( __kmp_root != NULL ) {
3309 int gtid;
3310 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3311 kmp_root_t const * root = __kmp_root[ gtid ];
3312 if ( root != NULL ) {
3313 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3314 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3315 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3316 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3317 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3318 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3319 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3320 __kmp_printf( "\n" );
3321 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3322 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3323 }; // if
3324 }; // for gtid
3325 } else {
3326 __kmp_printf( "Ubers array is not allocated.\n" );
3327 }; // if
3328
3329 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3330 while ( list->next != NULL ) {
3331 kmp_team_p const * team = list->entry;
3332 int i;
3333 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3334 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3335 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3336 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3337 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3338 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3339 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3340 __kmp_printf( " Thread %2d: ", i );
3341 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3342 }; // for i
3343 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3344 __kmp_printf( "\n" );
3345 list = list->next;
3346 }; // while
3347
3348 // Print out __kmp_thread_pool and __kmp_team_pool.
3349 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3350 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3351 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3352 __kmp_printf( "\n" );
3353
3354 // Free team list.
3355 while ( list != NULL ) {
3356 kmp_team_list_item_t * item = list;
3357 list = list->next;
3358 KMP_INTERNAL_FREE( item );
3359 }; // while
3360
3361}
3362
3363#endif
3364
3365
3366//---------------------------------------------------------------------------
3367// Stuff for per-thread fast random number generator
3368// Table of primes
3369
3370static const unsigned __kmp_primes[] = {
3371 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3372 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3373 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3374 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3375 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3376 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3377 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3378 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3379 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3380 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3381 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3382 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3383 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3384 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3385 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3386 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3387};
3388
3389//---------------------------------------------------------------------------
3390// __kmp_get_random: Get a random number using a linear congruential method.
3391
3392unsigned short
3393__kmp_get_random( kmp_info_t * thread )
3394{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003395 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396 unsigned short r = x>>16;
3397
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003398 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003399
3400 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3401 thread->th.th_info.ds.ds_tid, r) );
3402
3403 return r;
3404}
3405//--------------------------------------------------------
3406// __kmp_init_random: Initialize a random number generator
3407
3408void
3409__kmp_init_random( kmp_info_t * thread )
3410{
3411 unsigned seed = thread->th.th_info.ds.ds_tid;
3412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003413 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3414 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3415 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003416}
3417
3418
3419#if KMP_OS_WINDOWS
3420/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3421static int
3422__kmp_reclaim_dead_roots(void) {
3423 int i, r = 0;
3424
3425 for(i = 0; i < __kmp_threads_capacity; ++i) {
3426 if( KMP_UBER_GTID( i ) &&
3427 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3428 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3429 r += __kmp_unregister_root_other_thread(i);
3430 }
3431 }
3432 return r;
3433}
3434#endif
3435
3436/*
3437 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3438 free entries generated.
3439
3440 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3441 already dead.
3442
3443 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3444 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3445 __kmp_tp_capacity, if threadprivate cache array has been created.
3446 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3447
3448 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3449 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3450 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3451 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3452 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3453 as many free slots as possible up to nWish.
3454
3455 If any argument is negative, the behavior is undefined.
3456*/
3457static int
3458__kmp_expand_threads(int nWish, int nNeed) {
3459 int added = 0;
3460 int old_tp_cached;
3461 int __kmp_actual_max_nth;
3462
3463 if(nNeed > nWish) /* normalize the arguments */
3464 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003465#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466/* only for Windows static library */
3467 /* reclaim array entries for root threads that are already dead */
3468 added = __kmp_reclaim_dead_roots();
3469
3470 if(nNeed) {
3471 nNeed -= added;
3472 if(nNeed < 0)
3473 nNeed = 0;
3474 }
3475 if(nWish) {
3476 nWish -= added;
3477 if(nWish < 0)
3478 nWish = 0;
3479 }
3480#endif
3481 if(nWish <= 0)
3482 return added;
3483
3484 while(1) {
3485 int nTarget;
3486 int minimumRequiredCapacity;
3487 int newCapacity;
3488 kmp_info_t **newThreads;
3489 kmp_root_t **newRoot;
3490
3491 //
3492 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3493 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3494 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3495 // become > __kmp_max_nth in one of two ways:
3496 //
3497 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3498 // may not be resused by another thread, so we may need to increase
3499 // __kmp_threads_capacity to __kmp_max_threads + 1.
3500 //
3501 // 2) New foreign root(s) are encountered. We always register new
3502 // foreign roots. This may cause a smaller # of threads to be
3503 // allocated at subsequent parallel regions, but the worker threads
3504 // hang around (and eventually go to sleep) and need slots in the
3505 // __kmp_threads[] array.
3506 //
3507 // Anyway, that is the reason for moving the check to see if
3508 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3509 // instead of having it performed here. -BB
3510 //
3511 old_tp_cached = __kmp_tp_cached;
3512 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3513 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3514
3515 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3516 nTarget = nWish;
3517 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3518 /* can't fulfil nWish, so try nNeed */
3519 if(nNeed) {
3520 nTarget = nNeed;
3521 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3522 /* possible expansion too small -- give up */
3523 break;
3524 }
3525 } else {
3526 /* best-effort */
3527 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3528 if(!nTarget) {
3529 /* can expand at all -- give up */
3530 break;
3531 }
3532 }
3533 }
3534 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3535
3536 newCapacity = __kmp_threads_capacity;
3537 do{
3538 newCapacity =
3539 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3540 (newCapacity << 1) :
3541 __kmp_actual_max_nth;
3542 } while(newCapacity < minimumRequiredCapacity);
3543 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3544 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003545 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3546 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547 memset(newThreads + __kmp_threads_capacity, 0,
3548 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3549 memset(newRoot + __kmp_threads_capacity, 0,
3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3551
3552 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3553 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3554 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3555 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3556 of a double-check pair.
3557 */
3558 __kmp_free(newThreads);
3559 continue; /* start over and try again */
3560 }
3561 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3562 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3563 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3564 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3565 __kmp_free(newThreads);
3566 continue; /* start over and try again */
3567 } else {
3568 /* success */
3569 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3570 //
3571 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3572 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3573 added += newCapacity - __kmp_threads_capacity;
3574 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3575 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003576 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003577 }
3578 }
3579 return added;
3580}
3581
3582/* register the current thread as a root thread and obtain our gtid */
3583/* we must have the __kmp_initz_lock held at this point */
3584/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3585int
3586__kmp_register_root( int initial_thread )
3587{
3588 kmp_info_t *root_thread;
3589 kmp_root_t *root;
3590 int gtid;
3591 int capacity;
3592 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3593 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3594 KMP_MB();
3595
3596
3597 /*
3598 2007-03-02:
3599
3600 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3601 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3602 return false (that means there is at least one empty slot in __kmp_threads array), but it
3603 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3604 used for this one. Following code workarounds this bug.
3605
3606 However, right solution seems to be not reserving slot #0 for initial thread because:
3607 (1) there is no magic in slot #0,
3608 (2) we cannot detect initial thread reliably (the first thread which does serial
3609 initialization may be not a real initial thread).
3610 */
3611 capacity = __kmp_threads_capacity;
3612 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3613 -- capacity;
3614 }; // if
3615
3616 /* see if there are too many threads */
3617 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3618 if ( __kmp_tp_cached ) {
3619 __kmp_msg(
3620 kmp_ms_fatal,
3621 KMP_MSG( CantRegisterNewThread ),
3622 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3623 KMP_HNT( PossibleSystemLimitOnThreads ),
3624 __kmp_msg_null
3625 );
3626 }
3627 else {
3628 __kmp_msg(
3629 kmp_ms_fatal,
3630 KMP_MSG( CantRegisterNewThread ),
3631 KMP_HNT( SystemLimitOnThreads ),
3632 __kmp_msg_null
3633 );
3634 }
3635 }; // if
3636
3637 /* find an available thread slot */
3638 /* Don't reassign the zero slot since we need that to only be used by initial
3639 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003640 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3641 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003642 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3643 KMP_ASSERT( gtid < __kmp_threads_capacity );
3644
3645 /* update global accounting */
3646 __kmp_all_nth ++;
3647 TCW_4(__kmp_nth, __kmp_nth + 1);
3648
3649 //
3650 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3651 // for low numbers of procs, and method #2 (keyed API call) for higher
3652 // numbers of procs.
3653 //
3654 if ( __kmp_adjust_gtid_mode ) {
3655 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3656 if ( TCR_4(__kmp_gtid_mode) != 2) {
3657 TCW_4(__kmp_gtid_mode, 2);
3658 }
3659 }
3660 else {
3661 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3662 TCW_4(__kmp_gtid_mode, 1);
3663 }
3664 }
3665 }
3666
3667#ifdef KMP_ADJUST_BLOCKTIME
3668 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003669 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3671 if ( __kmp_nth > __kmp_avail_proc ) {
3672 __kmp_zero_bt = TRUE;
3673 }
3674 }
3675#endif /* KMP_ADJUST_BLOCKTIME */
3676
3677 /* setup this new hierarchy */
3678 if( ! ( root = __kmp_root[gtid] )) {
3679 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3680 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3681 }
3682
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003683#if KMP_STATS_ENABLED
3684 // Initialize stats as soon as possible (right after gtid assignment).
3685 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3686 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3687 KMP_SET_THREAD_STATE(SERIAL_REGION);
3688 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3689#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690 __kmp_initialize_root( root );
3691
3692 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003693 if( root->r.r_uber_thread ) {
3694 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695 } else {
3696 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3697 if ( __kmp_storage_map ) {
3698 __kmp_print_thread_storage_map( root_thread, gtid );
3699 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003700 root_thread->th.th_info .ds.ds_gtid = gtid;
3701 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003702 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003703 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704 }
3705 #if USE_FAST_MEMORY
3706 __kmp_initialize_fast_memory( root_thread );
3707 #endif /* USE_FAST_MEMORY */
3708
3709 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003710 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711 __kmp_initialize_bget( root_thread );
3712 #endif
3713 __kmp_init_random( root_thread ); // Initialize random number generator
3714 }
3715
3716 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003717 if( ! root_thread->th.th_serial_team ) {
3718 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003719 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003720
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003721 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003722#if OMPT_SUPPORT
3723 0, // root parallel id
3724#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003725#if OMP_40_ENABLED
3726 proc_bind_default,
3727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003729 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734
3735 /* drop root_thread into place */
3736 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3737
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003738 root->r.r_root_team->t.t_threads[0] = root_thread;
3739 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3740 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3741 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3742 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743
3744 /* initialize the thread, get it ready to go */
3745 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003746 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003747
3748 /* prepare the master thread for get_gtid() */
3749 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003750
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003751#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003752 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003753#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003754
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755 #ifdef KMP_TDATA_GTID
3756 __kmp_gtid = gtid;
3757 #endif
3758 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3759 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003760
3761 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3762 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003763 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003764 KMP_INIT_BARRIER_STATE ) );
3765 { // Initialize barrier data.
3766 int b;
3767 for ( b = 0; b < bs_last_barrier; ++ b ) {
3768 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003769#if USE_DEBUGGER
3770 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3771#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003772 }; // for
3773 }
3774 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3775
Alp Toker763b9392014-02-28 09:42:41 +00003776#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003777# if OMP_40_ENABLED
3778 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3779 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3780 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3781 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3782# endif
3783
Jim Cownie5e8470a2013-09-27 10:38:44 +00003784 if ( TCR_4(__kmp_init_middle) ) {
3785 __kmp_affinity_set_init_mask( gtid, TRUE );
3786 }
Alp Toker763b9392014-02-28 09:42:41 +00003787#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003788
3789 __kmp_root_counter ++;
3790
3791 KMP_MB();
3792 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3793
3794 return gtid;
3795}
3796
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003797#if KMP_NESTED_HOT_TEAMS
3798static int
3799__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3800{
3801 int i, n, nth;
3802 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3803 if( !hot_teams || !hot_teams[level].hot_team ) {
3804 return 0;
3805 }
3806 KMP_DEBUG_ASSERT( level < max_level );
3807 kmp_team_t *team = hot_teams[level].hot_team;
3808 nth = hot_teams[level].hot_team_nth;
3809 n = nth - 1; // master is not freed
3810 if( level < max_level - 1 ) {
3811 for( i = 0; i < nth; ++i ) {
3812 kmp_info_t *th = team->t.t_threads[i];
3813 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3814 if( i > 0 && th->th.th_hot_teams ) {
3815 __kmp_free( th->th.th_hot_teams );
3816 th->th.th_hot_teams = NULL;
3817 }
3818 }
3819 }
3820 __kmp_free_team( root, team, NULL );
3821 return n;
3822}
3823#endif
3824
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825/* Resets a root thread and clear its root and hot teams.
3826 Returns the number of __kmp_threads entries directly and indirectly freed.
3827*/
3828static int
3829__kmp_reset_root(int gtid, kmp_root_t *root)
3830{
3831 kmp_team_t * root_team = root->r.r_root_team;
3832 kmp_team_t * hot_team = root->r.r_hot_team;
3833 int n = hot_team->t.t_nproc;
3834 int i;
3835
3836 KMP_DEBUG_ASSERT( ! root->r.r_active );
3837
3838 root->r.r_root_team = NULL;
3839 root->r.r_hot_team = NULL;
3840 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3841 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003842 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3843#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003844 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003845 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3846 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003847 if( __kmp_hot_teams_max_level > 1 ) {
3848 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003850 if( th->th.th_hot_teams ) {
3851 __kmp_free( th->th.th_hot_teams );
3852 th->th.th_hot_teams = NULL;
3853 }
3854 }
3855 }
3856#endif
3857 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003858
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859 //
3860 // Before we can reap the thread, we need to make certain that all
3861 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3862 //
3863 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3864 __kmp_wait_to_unref_task_teams();
3865 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
3867 #if KMP_OS_WINDOWS
3868 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3869 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3870 (LPVOID)&(root->r.r_uber_thread->th),
3871 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3872 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3873 #endif /* KMP_OS_WINDOWS */
3874
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003875#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003876 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003877 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3878 int gtid = __kmp_get_gtid();
3879 __ompt_thread_end(ompt_thread_initial, gtid);
3880 }
3881#endif
3882
Jim Cownie5e8470a2013-09-27 10:38:44 +00003883 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3884 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3885
3886 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3887 root->r.r_uber_thread = NULL;
3888 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003889 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003890
3891 return n;
3892}
3893
3894void
3895__kmp_unregister_root_current_thread( int gtid )
3896{
Jim Cownie77c2a632014-09-03 11:34:33 +00003897 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003898 /* this lock should be ok, since unregister_root_current_thread is never called during
3899 * and abort, only during a normal close. furthermore, if you have the
3900 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003901
3902 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3903 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3904 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3905 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3906 return;
3907 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003908 kmp_root_t *root = __kmp_root[gtid];
3909
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3911 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3912 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3913 KMP_ASSERT( root->r.r_active == FALSE );
3914
Jim Cownie5e8470a2013-09-27 10:38:44 +00003915
3916 KMP_MB();
3917
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003918#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003919 kmp_info_t * thread = __kmp_threads[gtid];
3920 kmp_team_t * team = thread->th.th_team;
3921 kmp_task_team_t * task_team = thread->th.th_task_team;
3922
3923 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003924 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3925#if OMPT_SUPPORT
3926 // the runtime is shutting down so we won't report any events
3927 thread->th.ompt_thread_info.state = ompt_state_undefined;
3928#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003929 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003930 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003931#endif
3932
Jim Cownie5e8470a2013-09-27 10:38:44 +00003933 __kmp_reset_root(gtid, root);
3934
3935 /* free up this thread slot */
3936 __kmp_gtid_set_specific( KMP_GTID_DNE );
3937#ifdef KMP_TDATA_GTID
3938 __kmp_gtid = KMP_GTID_DNE;
3939#endif
3940
3941 KMP_MB();
3942 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3943
3944 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3945}
3946
Jonathan Peyton2321d572015-06-08 19:25:25 +00003947#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003948/* __kmp_forkjoin_lock must be already held
3949 Unregisters a root thread that is not the current thread. Returns the number of
3950 __kmp_threads entries freed as a result.
3951 */
3952static int
3953__kmp_unregister_root_other_thread( int gtid )
3954{
3955 kmp_root_t *root = __kmp_root[gtid];
3956 int r;
3957
3958 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3959 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3960 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3961 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3962 KMP_ASSERT( root->r.r_active == FALSE );
3963
3964 r = __kmp_reset_root(gtid, root);
3965 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3966 return r;
3967}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003968#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003969
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970#if KMP_DEBUG
3971void __kmp_task_info() {
3972
3973 kmp_int32 gtid = __kmp_entry_gtid();
3974 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3975 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003976 kmp_team_t *steam = this_thr->th.th_serial_team;
3977 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003978
3979 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3980 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3981}
3982#endif // KMP_DEBUG
3983
Jim Cownie5e8470a2013-09-27 10:38:44 +00003984/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003985 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986 * initialization of features as much as possible */
3987static void
3988__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3989{
3990 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3991 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003992 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003993 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003996 KMP_DEBUG_ASSERT( team->t.t_threads );
3997 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3998 KMP_DEBUG_ASSERT( master );
3999 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000
4001 KMP_MB();
4002
4003 TCW_SYNC_PTR(this_thr->th.th_team, team);
4004
4005 this_thr->th.th_info.ds.ds_tid = tid;
4006 this_thr->th.th_set_nproc = 0;
4007#if OMP_40_ENABLED
4008 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00004009# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004010 this_thr->th.th_new_place = this_thr->th.th_current_place;
4011# endif
4012#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004013 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004014
4015 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004016 this_thr->th.th_team_nproc = team->t.t_nproc;
4017 this_thr->th.th_team_master = master;
4018 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004019 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4020
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004021 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004022
4023 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4024 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4025
4026 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4027
4028 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4029 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4030 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004031
4032 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004033 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004034
4035 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004036
4037#ifdef BUILD_TV
4038 this_thr->th.th_local.tv_data = 0;
4039#endif
4040
4041 if ( ! this_thr->th.th_pri_common ) {
4042 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4043 if ( __kmp_storage_map ) {
4044 __kmp_print_storage_map_gtid(
4045 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4046 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4047 );
4048 }; // if
4049 this_thr->th.th_pri_head = NULL;
4050 }; // if
4051
4052 /* Initialize dynamic dispatch */
4053 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004054 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055 /*
4056 * Use team max_nproc since this will never change for the team.
4057 */
4058 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004059 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004060 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4061 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004062 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4064
4065 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004066#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004067 dispatch->th_doacross_buf_idx = 0;
4068#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004069 if( ! dispatch->th_disp_buffer ) {
4070 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071
4072 if ( __kmp_storage_map ) {
4073 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004074 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004075 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4076 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4077 gtid, team->t.t_id, gtid );
4078 }
4079 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004080 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081 }
4082
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004083 dispatch->th_dispatch_pr_current = 0;
4084 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004086 dispatch->th_deo_fcn = 0; /* ORDERED */
4087 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004088 }
4089
4090 this_thr->th.th_next_pool = NULL;
4091
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004092 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004093 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004094 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4095 this_thr->th.th_task_state_top = 0;
4096 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004097 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4098 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004099 }
4100
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4102 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4103
4104 KMP_MB();
4105}
4106
4107
4108/* allocate a new thread for the requesting team. this is only called from within a
4109 * forkjoin critical section. we will first try to get an available thread from the
4110 * thread pool. if none is available, we will fork a new one assuming we are able
4111 * to create a new one. this should be assured, as the caller should check on this
4112 * first.
4113 */
4114kmp_info_t *
4115__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4116{
4117 kmp_team_t *serial_team;
4118 kmp_info_t *new_thr;
4119 int new_gtid;
4120
4121 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4122 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004123#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004124 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004125#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004126 KMP_MB();
4127
4128 /* first, try to get one from the thread pool */
4129 if ( __kmp_thread_pool ) {
4130
4131 new_thr = (kmp_info_t*)__kmp_thread_pool;
4132 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4133 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4134 __kmp_thread_pool_insert_pt = NULL;
4135 }
4136 TCW_4(new_thr->th.th_in_pool, FALSE);
4137 //
4138 // Don't touch th_active_in_pool or th_active.
4139 // The worker thread adjusts those flags as it sleeps/awakens.
4140 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004141 __kmp_thread_pool_nth--;
4142
4143 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4144 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004145 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004146 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4147 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4148
4149 /* setup the thread structure */
4150 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4151 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4152
4153 TCW_4(__kmp_nth, __kmp_nth + 1);
4154
Jonathan Peyton54127982015-11-04 21:37:48 +00004155 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004156 new_thr->th.th_task_state_top = 0;
4157 new_thr->th.th_task_state_stack_sz = 4;
4158
Jim Cownie5e8470a2013-09-27 10:38:44 +00004159#ifdef KMP_ADJUST_BLOCKTIME
4160 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004161 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004162 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4163 if ( __kmp_nth > __kmp_avail_proc ) {
4164 __kmp_zero_bt = TRUE;
4165 }
4166 }
4167#endif /* KMP_ADJUST_BLOCKTIME */
4168
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004169#if KMP_DEBUG
4170 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4171 int b;
4172 kmp_balign_t * balign = new_thr->th.th_bar;
4173 for( b = 0; b < bs_last_barrier; ++ b )
4174 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4175#endif
4176
Jim Cownie5e8470a2013-09-27 10:38:44 +00004177 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4178 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4179
4180 KMP_MB();
4181 return new_thr;
4182 }
4183
4184
4185 /* no, well fork a new one */
4186 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4187 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4188
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004189#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004190 //
4191 // If this is the first worker thread the RTL is creating, then also
4192 // launch the monitor thread. We try to do this as early as possible.
4193 //
4194 if ( ! TCR_4( __kmp_init_monitor ) ) {
4195 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4196 if ( ! TCR_4( __kmp_init_monitor ) ) {
4197 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4198 TCW_4( __kmp_init_monitor, 1 );
4199 __kmp_create_monitor( & __kmp_monitor );
4200 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004201 #if KMP_OS_WINDOWS
4202 // AC: wait until monitor has started. This is a fix for CQ232808.
4203 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4204 // work in between, then there is high probability that monitor thread started after
4205 // the library shutdown. At shutdown it is too late to cope with the problem, because
4206 // when the master is in DllMain (process detach) the monitor has no chances to start
4207 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4208 // because all the memory which the monitor can access is going to be released/reset.
4209 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4210 KMP_YIELD( TRUE );
4211 }
4212 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4213 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004214 }
4215 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4216 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004217#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218
4219 KMP_MB();
4220 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4221 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4222 }
4223
4224 /* allocate space for it. */
4225 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4226
4227 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4228
4229 if ( __kmp_storage_map ) {
4230 __kmp_print_thread_storage_map( new_thr, new_gtid );
4231 }
4232
4233 /* add the reserve serialized team, initialized from the team's master thread */
4234 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004235 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004236 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004237
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004238 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004239 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004240#if OMPT_SUPPORT
4241 0, // root parallel id
4242#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004243#if OMP_40_ENABLED
4244 proc_bind_default,
4245#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004247 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004248 }
4249 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004250 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4251 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004252 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4253 new_thr ) );
4254
4255 /* setup the thread structures */
4256 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4257
4258 #if USE_FAST_MEMORY
4259 __kmp_initialize_fast_memory( new_thr );
4260 #endif /* USE_FAST_MEMORY */
4261
4262 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004263 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004264 __kmp_initialize_bget( new_thr );
4265 #endif
4266
4267 __kmp_init_random( new_thr ); // Initialize random number generator
4268
4269 /* Initialize these only once when thread is grabbed for a team allocation */
4270 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4271 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4272
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004273 int b;
4274 kmp_balign_t * balign = new_thr->th.th_bar;
4275 for(b=0; b<bs_last_barrier; ++b) {
4276 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4277 balign[b].bb.team = NULL;
4278 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4279 balign[b].bb.use_oncore_barrier = 0;
4280 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004281
4282 new_thr->th.th_spin_here = FALSE;
4283 new_thr->th.th_next_waiting = 0;
4284
Alp Toker98758b02014-03-02 04:12:06 +00004285#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004286 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4287 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4288 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4289 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4290#endif
4291
4292 TCW_4(new_thr->th.th_in_pool, FALSE);
4293 new_thr->th.th_active_in_pool = FALSE;
4294 TCW_4(new_thr->th.th_active, TRUE);
4295
4296 /* adjust the global counters */
4297 __kmp_all_nth ++;
4298 __kmp_nth ++;
4299
4300 //
4301 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4302 // for low numbers of procs, and method #2 (keyed API call) for higher
4303 // numbers of procs.
4304 //
4305 if ( __kmp_adjust_gtid_mode ) {
4306 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4307 if ( TCR_4(__kmp_gtid_mode) != 2) {
4308 TCW_4(__kmp_gtid_mode, 2);
4309 }
4310 }
4311 else {
4312 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4313 TCW_4(__kmp_gtid_mode, 1);
4314 }
4315 }
4316 }
4317
4318#ifdef KMP_ADJUST_BLOCKTIME
4319 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004320 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004321 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4322 if ( __kmp_nth > __kmp_avail_proc ) {
4323 __kmp_zero_bt = TRUE;
4324 }
4325 }
4326#endif /* KMP_ADJUST_BLOCKTIME */
4327
4328 /* actually fork it and create the new worker thread */
4329 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4330 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4331 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4332
Jim Cownie5e8470a2013-09-27 10:38:44 +00004333 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4334 KMP_MB();
4335 return new_thr;
4336}
4337
4338/*
4339 * reinitialize team for reuse.
4340 *
4341 * The hot team code calls this case at every fork barrier, so EPCC barrier
4342 * test are extremely sensitive to changes in it, esp. writes to the team
4343 * struct, which cause a cache invalidation in all threads.
4344 *
4345 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4346 */
4347static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004348__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004349 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4350 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004351 KMP_DEBUG_ASSERT( team && new_icvs);
4352 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004353 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004354
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004355 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004356
Jim Cownie181b4bb2013-12-23 17:28:57 +00004357 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004358 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004359 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004360
4361 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4362 team->t.t_threads[0], team ) );
4363}
4364
Jim Cownie5e8470a2013-09-27 10:38:44 +00004365
4366/* initialize the team data structure
4367 * this assumes the t_threads and t_max_nproc are already set
4368 * also, we don't touch the arguments */
4369static void
4370__kmp_initialize_team(
4371 kmp_team_t * team,
4372 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004373 kmp_internal_control_t * new_icvs,
4374 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004375) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004376 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4377
Jim Cownie5e8470a2013-09-27 10:38:44 +00004378 /* verify */
4379 KMP_DEBUG_ASSERT( team );
4380 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4381 KMP_DEBUG_ASSERT( team->t.t_threads );
4382 KMP_MB();
4383
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004384 team->t.t_master_tid = 0; /* not needed */
4385 /* team->t.t_master_bar; not needed */
4386 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4387 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004388
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004389 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4390 team->t.t_next_pool = NULL;
4391 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004392
4393 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004394 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004395
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004396 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4397 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
4399#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004400 team->t.t_fp_control_saved = FALSE; /* not needed */
4401 team->t.t_x87_fpu_control_word = 0; /* not needed */
4402 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4404
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004405 team->t.t_construct = 0;
4406 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004408 team->t.t_ordered .dt.t_value = 0;
4409 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004410
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004411 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004412
4413#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004414 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004415#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004416 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004417
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004418 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004419
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004420 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004421
4422 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004423 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004424}
4425
Alp Toker98758b02014-03-02 04:12:06 +00004426#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004427/* Sets full mask for thread and returns old mask, no changes to structures. */
4428static void
4429__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4430{
4431 if ( KMP_AFFINITY_CAPABLE() ) {
4432 int status;
4433 if ( old_mask != NULL ) {
4434 status = __kmp_get_system_affinity( old_mask, TRUE );
4435 int error = errno;
4436 if ( status != 0 ) {
4437 __kmp_msg(
4438 kmp_ms_fatal,
4439 KMP_MSG( ChangeThreadAffMaskError ),
4440 KMP_ERR( error ),
4441 __kmp_msg_null
4442 );
4443 }
4444 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004445 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004446 }
4447}
4448#endif
4449
Alp Toker98758b02014-03-02 04:12:06 +00004450#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004451
4452//
4453// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4454// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004455// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004456// The master thread's partition should already include its current binding.
4457//
4458static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004459__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004460{
4461 //
4462 // Copy the master thread's place partion to the team struct
4463 //
4464 kmp_info_t *master_th = team->t.t_threads[0];
4465 KMP_DEBUG_ASSERT( master_th != NULL );
4466 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4467 int first_place = master_th->th.th_first_place;
4468 int last_place = master_th->th.th_last_place;
4469 int masters_place = master_th->th.th_current_place;
4470 team->t.t_first_place = first_place;
4471 team->t.t_last_place = last_place;
4472
4473 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4474 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4475 masters_place, first_place, last_place ) );
4476
4477 switch ( proc_bind ) {
4478
4479 case proc_bind_default:
4480 //
4481 // serial teams might have the proc_bind policy set to
4482 // proc_bind_default. It doesn't matter, as we don't
4483 // rebind the master thread for any proc_bind policy.
4484 //
4485 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4486 break;
4487
4488 case proc_bind_master:
4489 {
4490 int f;
4491 int n_th = team->t.t_nproc;
4492 for ( f = 1; f < n_th; f++ ) {
4493 kmp_info_t *th = team->t.t_threads[f];
4494 KMP_DEBUG_ASSERT( th != NULL );
4495 th->th.th_first_place = first_place;
4496 th->th.th_last_place = last_place;
4497 th->th.th_new_place = masters_place;
4498
4499 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4500 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4501 team->t.t_id, f, masters_place, first_place, last_place ) );
4502 }
4503 }
4504 break;
4505
4506 case proc_bind_close:
4507 {
4508 int f;
4509 int n_th = team->t.t_nproc;
4510 int n_places;
4511 if ( first_place <= last_place ) {
4512 n_places = last_place - first_place + 1;
4513 }
4514 else {
4515 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4516 }
4517 if ( n_th <= n_places ) {
4518 int place = masters_place;
4519 for ( f = 1; f < n_th; f++ ) {
4520 kmp_info_t *th = team->t.t_threads[f];
4521 KMP_DEBUG_ASSERT( th != NULL );
4522
4523 if ( place == last_place ) {
4524 place = first_place;
4525 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004526 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004527 place = 0;
4528 }
4529 else {
4530 place++;
4531 }
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4535
4536 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4537 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4538 team->t.t_id, f, place, first_place, last_place ) );
4539 }
4540 }
4541 else {
4542 int S, rem, gap, s_count;
4543 S = n_th / n_places;
4544 s_count = 0;
4545 rem = n_th - ( S * n_places );
4546 gap = rem > 0 ? n_places/rem : n_places;
4547 int place = masters_place;
4548 int gap_ct = gap;
4549 for ( f = 0; f < n_th; f++ ) {
4550 kmp_info_t *th = team->t.t_threads[f];
4551 KMP_DEBUG_ASSERT( th != NULL );
4552
4553 th->th.th_first_place = first_place;
4554 th->th.th_last_place = last_place;
4555 th->th.th_new_place = place;
4556 s_count++;
4557
4558 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4559 // do nothing, add an extra thread to place on next iteration
4560 }
4561 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4562 // we added an extra thread to this place; move to next place
4563 if ( place == last_place ) {
4564 place = first_place;
4565 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004566 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004567 place = 0;
4568 }
4569 else {
4570 place++;
4571 }
4572 s_count = 0;
4573 gap_ct = 1;
4574 rem--;
4575 }
4576 else if (s_count == S) { // place full; don't add extra
4577 if ( place == last_place ) {
4578 place = first_place;
4579 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004580 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004581 place = 0;
4582 }
4583 else {
4584 place++;
4585 }
4586 gap_ct++;
4587 s_count = 0;
4588 }
4589
4590 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4591 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4592 team->t.t_id, f, th->th.th_new_place, first_place,
4593 last_place ) );
4594 }
4595 KMP_DEBUG_ASSERT( place == masters_place );
4596 }
4597 }
4598 break;
4599
4600 case proc_bind_spread:
4601 {
4602 int f;
4603 int n_th = team->t.t_nproc;
4604 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004605 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004606 if ( first_place <= last_place ) {
4607 n_places = last_place - first_place + 1;
4608 }
4609 else {
4610 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4611 }
4612 if ( n_th <= n_places ) {
4613 int place = masters_place;
4614 int S = n_places/n_th;
4615 int s_count, rem, gap, gap_ct;
4616 rem = n_places - n_th*S;
4617 gap = rem ? n_th/rem : 1;
4618 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004619 thidx = n_th;
4620 if (update_master_only == 1)
4621 thidx = 1;
4622 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004623 kmp_info_t *th = team->t.t_threads[f];
4624 KMP_DEBUG_ASSERT( th != NULL );
4625
4626 th->th.th_first_place = place;
4627 th->th.th_new_place = place;
4628 s_count = 1;
4629 while (s_count < S) {
4630 if ( place == last_place ) {
4631 place = first_place;
4632 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004633 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004634 place = 0;
4635 }
4636 else {
4637 place++;
4638 }
4639 s_count++;
4640 }
4641 if (rem && (gap_ct == gap)) {
4642 if ( place == last_place ) {
4643 place = first_place;
4644 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004645 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004646 place = 0;
4647 }
4648 else {
4649 place++;
4650 }
4651 rem--;
4652 gap_ct = 0;
4653 }
4654 th->th.th_last_place = place;
4655 gap_ct++;
4656
4657 if ( place == last_place ) {
4658 place = first_place;
4659 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004660 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004661 place = 0;
4662 }
4663 else {
4664 place++;
4665 }
4666
4667 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4668 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4669 team->t.t_id, f, th->th.th_new_place,
4670 th->th.th_first_place, th->th.th_last_place ) );
4671 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004672 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004673 }
4674 else {
4675 int S, rem, gap, s_count;
4676 S = n_th / n_places;
4677 s_count = 0;
4678 rem = n_th - ( S * n_places );
4679 gap = rem > 0 ? n_places/rem : n_places;
4680 int place = masters_place;
4681 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004682 thidx = n_th;
4683 if (update_master_only == 1)
4684 thidx = 1;
4685 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004686 kmp_info_t *th = team->t.t_threads[f];
4687 KMP_DEBUG_ASSERT( th != NULL );
4688
4689 th->th.th_first_place = place;
4690 th->th.th_last_place = place;
4691 th->th.th_new_place = place;
4692 s_count++;
4693
4694 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4695 // do nothing, add an extra thread to place on next iteration
4696 }
4697 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4698 // we added an extra thread to this place; move on to next place
4699 if ( place == last_place ) {
4700 place = first_place;
4701 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004702 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004703 place = 0;
4704 }
4705 else {
4706 place++;
4707 }
4708 s_count = 0;
4709 gap_ct = 1;
4710 rem--;
4711 }
4712 else if (s_count == S) { // place is full; don't add extra thread
4713 if ( place == last_place ) {
4714 place = first_place;
4715 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004716 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004717 place = 0;
4718 }
4719 else {
4720 place++;
4721 }
4722 gap_ct++;
4723 s_count = 0;
4724 }
4725
4726 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4727 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4728 team->t.t_id, f, th->th.th_new_place,
4729 th->th.th_first_place, th->th.th_last_place) );
4730 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004731 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004732 }
4733 }
4734 break;
4735
4736 default:
4737 break;
4738 }
4739
4740 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4741}
4742
Alp Toker98758b02014-03-02 04:12:06 +00004743#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004744
4745/* allocate a new team data structure to use. take one off of the free pool if available */
4746kmp_team_t *
4747__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004748#if OMPT_SUPPORT
4749 ompt_parallel_id_t ompt_parallel_id,
4750#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004751#if OMP_40_ENABLED
4752 kmp_proc_bind_t new_proc_bind,
4753#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004754 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004755 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004756{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00004757 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004758 int f;
4759 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004760 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004761 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004762
4763 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4764 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4765 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4766 KMP_MB();
4767
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004768#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004769 kmp_hot_team_ptr_t *hot_teams;
4770 if( master ) {
4771 team = master->th.th_team;
4772 level = team->t.t_active_level;
4773 if( master->th.th_teams_microtask ) { // in teams construct?
4774 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4775 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4776 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4777 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4778 }
4779 }
4780 hot_teams = master->th.th_hot_teams;
4781 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4782 { // hot team has already been allocated for given level
4783 use_hot_team = 1;
4784 } else {
4785 use_hot_team = 0;
4786 }
4787 }
4788#endif
4789 // Optimization to use a "hot" team
4790 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004791 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004792#if KMP_NESTED_HOT_TEAMS
4793 team = hot_teams[level].hot_team;
4794#else
4795 team = root->r.r_hot_team;
4796#endif
4797#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004798 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004799 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4800 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004801 }
4802#endif
4803
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004804 // Has the number of threads changed?
4805 /* Let's assume the most common case is that the number of threads is unchanged, and
4806 put that case first. */
4807 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4808 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004809 // This case can mean that omp_set_num_threads() was called and the hot team size
4810 // was already reduced, so we check the special flag
4811 if ( team->t.t_size_changed == -1 ) {
4812 team->t.t_size_changed = 1;
4813 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004814 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004815 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004816
4817 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004818 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004819 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4820 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004821 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004822
4823 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4824
4825 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4826 0, team->t.t_threads[0], team ) );
4827 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4828
4829#if OMP_40_ENABLED
4830# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004831 if ( ( team->t.t_size_changed == 0 )
4832 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004833 if (new_proc_bind == proc_bind_spread) {
4834 __kmp_partition_places(team, 1); // add flag to update only master for spread
4835 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004836 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4837 team->t.t_id, new_proc_bind, team->t.t_first_place,
4838 team->t.t_last_place ) );
4839 }
4840 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004841 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004842 __kmp_partition_places( team );
4843 }
4844# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004845 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004846# endif /* KMP_AFFINITY_SUPPORTED */
4847#endif /* OMP_40_ENABLED */
4848 }
4849 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004850 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4851
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004852 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004853#if KMP_NESTED_HOT_TEAMS
4854 if( __kmp_hot_teams_mode == 0 ) {
4855 // AC: saved number of threads should correspond to team's value in this mode,
4856 // can be bigger in mode 1, when hot team has some threads in reserve
4857 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4858 hot_teams[level].hot_team_nth = new_nproc;
4859#endif // KMP_NESTED_HOT_TEAMS
4860 /* release the extra threads we don't need any more */
4861 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4862 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004863 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4864 // When decreasing team size, threads no longer in the team should unref task team.
4865 team->t.t_threads[f]->th.th_task_team = NULL;
4866 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004867 __kmp_free_thread( team->t.t_threads[ f ] );
4868 team->t.t_threads[ f ] = NULL;
4869 }
4870#if KMP_NESTED_HOT_TEAMS
4871 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004872 else {
4873 // When keeping extra threads in team, switch threads to wait on own b_go flag
4874 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4875 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4876 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4877 for (int b=0; b<bs_last_barrier; ++b) {
4878 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4879 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4880 }
4881 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4882 }
4883 }
4884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004885#endif // KMP_NESTED_HOT_TEAMS
4886 team->t.t_nproc = new_nproc;
4887 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004888 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4889 team->t.t_sched.chunk != new_icvs->sched.chunk)
4890 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004891 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004892
Jim Cownie5e8470a2013-09-27 10:38:44 +00004893 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004894 for(f = 0; f < new_nproc; ++f) {
4895 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004896 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004897 // restore the current task state of the master thread: should be the implicit task
4898 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4899 0, team->t.t_threads[0], team ) );
4900
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004901 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004902
4903#ifdef KMP_DEBUG
4904 for ( f = 0; f < team->t.t_nproc; f++ ) {
4905 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4906 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4907 }
4908#endif
4909
4910#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004911 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004912# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004913 __kmp_partition_places( team );
4914# endif
4915#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004916 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004917 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004918#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004919 kmp_affin_mask_t *old_mask;
4920 if ( KMP_AFFINITY_CAPABLE() ) {
4921 KMP_CPU_ALLOC(old_mask);
4922 }
4923#endif
4924
4925 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4926
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004927 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004928
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004929#if KMP_NESTED_HOT_TEAMS
4930 int avail_threads = hot_teams[level].hot_team_nth;
4931 if( new_nproc < avail_threads )
4932 avail_threads = new_nproc;
4933 kmp_info_t **other_threads = team->t.t_threads;
4934 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4935 // Adjust barrier data of reserved threads (if any) of the team
4936 // Other data will be set in __kmp_initialize_info() below.
4937 int b;
4938 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4939 for ( b = 0; b < bs_last_barrier; ++ b ) {
4940 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4941 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004942#if USE_DEBUGGER
4943 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4944#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004945 }
4946 }
4947 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4948 // we have all needed threads in reserve, no need to allocate any
4949 // this only possible in mode 1, cannot have reserved threads in mode 0
4950 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4951 team->t.t_nproc = new_nproc; // just get reserved threads involved
4952 } else {
4953 // we may have some threads in reserve, but not enough
4954 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4955 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4956#endif // KMP_NESTED_HOT_TEAMS
4957 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004958 /* reallocate larger arrays */
4959 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004960 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004961 }
4962
Alp Toker98758b02014-03-02 04:12:06 +00004963#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004964 /* Temporarily set full mask for master thread before
4965 creation of workers. The reason is that workers inherit
4966 the affinity from master, so if a lot of workers are
4967 created on the single core quickly, they don't get
4968 a chance to set their own affinity for a long time.
4969 */
4970 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4971#endif
4972
4973 /* allocate new threads for the hot team */
4974 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4975 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4976 KMP_DEBUG_ASSERT( new_worker );
4977 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004978
Jonathan Peytond26e2132015-09-10 18:44:30 +00004979 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004980 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4981 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4982 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4983
4984 { // Initialize barrier data for new threads.
4985 int b;
4986 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004987 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004988 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004989 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004990#if USE_DEBUGGER
4991 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4992#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004993 }
4994 }
4995 }
4996
Alp Toker98758b02014-03-02 04:12:06 +00004997#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004998 if ( KMP_AFFINITY_CAPABLE() ) {
4999 /* Restore initial master thread's affinity mask */
5000 __kmp_set_system_affinity( old_mask, TRUE );
5001 KMP_CPU_FREE(old_mask);
5002 }
5003#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005004#if KMP_NESTED_HOT_TEAMS
5005 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5006#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005007 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00005008 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005009 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005010
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005011 /* reinitialize the threads */
5012 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005013 for (f=0; f < team->t.t_nproc; ++f)
5014 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5015 if (level) { // set th_task_state for new threads in nested hot team
5016 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005017 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005018 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5019 for (f=old_nproc; f < team->t.t_nproc; ++f)
5020 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005021 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005022 else { // set th_task_state for new threads in non-nested hot team
5023 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5024 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005025 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005026 }
5027
Jim Cownie5e8470a2013-09-27 10:38:44 +00005028#ifdef KMP_DEBUG
5029 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5030 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5031 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5032 }
5033#endif
5034
5035#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005036 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005037# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005038 __kmp_partition_places( team );
5039# endif
5040#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005041 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005042
5043#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005044 kmp_info_t *master = team->t.t_threads[0];
5045 if( master->th.th_teams_microtask ) {
5046 for( f = 1; f < new_nproc; ++f ) {
5047 // propagate teams construct specific info to workers
5048 kmp_info_t *thr = team->t.t_threads[f];
5049 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5050 thr->th.th_teams_level = master->th.th_teams_level;
5051 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005052 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005053 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005054#endif /* OMP_40_ENABLED */
5055#if KMP_NESTED_HOT_TEAMS
5056 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005057 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005058 for( f = 1; f < new_nproc; ++f ) {
5059 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005060 int b;
5061 kmp_balign_t * balign = thr->th.th_bar;
5062 for( b = 0; b < bs_last_barrier; ++ b ) {
5063 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5064 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005065#if USE_DEBUGGER
5066 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5067#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005068 }
5069 }
5070 }
5071#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005072
5073 /* reallocate space for arguments if necessary */
5074 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005075 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005076 //
5077 // The hot team re-uses the previous task team,
5078 // if untouched during the previous release->gather phase.
5079 //
5080
5081 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5082
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005083#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005084 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005085 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5086 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005087 }
5088#endif
5089
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005090#if OMPT_SUPPORT
5091 __ompt_team_assign_id(team, ompt_parallel_id);
5092#endif
5093
Jim Cownie5e8470a2013-09-27 10:38:44 +00005094 KMP_MB();
5095
5096 return team;
5097 }
5098
5099 /* next, let's try to take one from the team pool */
5100 KMP_MB();
5101 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5102 {
5103 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5104 if ( team->t.t_max_nproc >= max_nproc ) {
5105 /* take this team from the team pool */
5106 __kmp_team_pool = team->t.t_next_pool;
5107
5108 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005109 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005110
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005111 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5112 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5113 team->t.t_task_team[0] = NULL;
5114 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005115
5116 /* reallocate space for arguments if necessary */
5117 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005118 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005119
5120 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5121 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5122 { // Initialize barrier data.
5123 int b;
5124 for ( b = 0; b < bs_last_barrier; ++ b) {
5125 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005126#if USE_DEBUGGER
5127 team->t.t_bar[ b ].b_master_arrived = 0;
5128 team->t.t_bar[ b ].b_team_arrived = 0;
5129#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005130 }
5131 }
5132
5133#if OMP_40_ENABLED
5134 team->t.t_proc_bind = new_proc_bind;
5135#endif
5136
5137 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005138
5139#if OMPT_SUPPORT
5140 __ompt_team_assign_id(team, ompt_parallel_id);
5141#endif
5142
Jim Cownie5e8470a2013-09-27 10:38:44 +00005143 KMP_MB();
5144
5145 return team;
5146 }
5147
5148 /* reap team if it is too small, then loop back and check the next one */
5149 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5150 /* TODO: Use technique to find the right size hot-team, don't reap them */
5151 team = __kmp_reap_team( team );
5152 __kmp_team_pool = team;
5153 }
5154
5155 /* nothing available in the pool, no matter, make a new team! */
5156 KMP_MB();
5157 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5158
5159 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005160 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005161 /* NOTE well, for some reason allocating one big buffer and dividing it
5162 * up seems to really hurt performance a lot on the P4, so, let's not use
5163 * this... */
5164 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005165
5166 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005167 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005168
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005169 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5170 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5171 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5172 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005173
5174 if ( __kmp_storage_map ) {
5175 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5176 }
5177
5178 /* allocate space for arguments */
5179 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005180 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005181
5182 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5183 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5184 { // Initialize barrier data.
5185 int b;
5186 for ( b = 0; b < bs_last_barrier; ++ b ) {
5187 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005188#if USE_DEBUGGER
5189 team->t.t_bar[ b ].b_master_arrived = 0;
5190 team->t.t_bar[ b ].b_team_arrived = 0;
5191#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005192 }
5193 }
5194
5195#if OMP_40_ENABLED
5196 team->t.t_proc_bind = new_proc_bind;
5197#endif
5198
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005199#if OMPT_SUPPORT
5200 __ompt_team_assign_id(team, ompt_parallel_id);
5201 team->t.ompt_serialized_team_info = NULL;
5202#endif
5203
Jim Cownie5e8470a2013-09-27 10:38:44 +00005204 KMP_MB();
5205
5206 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5207
5208 return team;
5209}
5210
5211/* TODO implement hot-teams at all levels */
5212/* TODO implement lazy thread release on demand (disband request) */
5213
5214/* free the team. return it to the team pool. release all the threads
5215 * associated with it */
5216void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005217__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005218{
5219 int f;
5220 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5221
5222 /* verify state */
5223 KMP_DEBUG_ASSERT( root );
5224 KMP_DEBUG_ASSERT( team );
5225 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5226 KMP_DEBUG_ASSERT( team->t.t_threads );
5227
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005228 int use_hot_team = team == root->r.r_hot_team;
5229#if KMP_NESTED_HOT_TEAMS
5230 int level;
5231 kmp_hot_team_ptr_t *hot_teams;
5232 if( master ) {
5233 level = team->t.t_active_level - 1;
5234 if( master->th.th_teams_microtask ) { // in teams construct?
5235 if( master->th.th_teams_size.nteams > 1 ) {
5236 ++level; // level was not increased in teams construct for team_of_masters
5237 }
5238 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5239 master->th.th_teams_level == team->t.t_level ) {
5240 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5241 } // team->t.t_level will be increased inside parallel
5242 }
5243 hot_teams = master->th.th_hot_teams;
5244 if( level < __kmp_hot_teams_max_level ) {
5245 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5246 use_hot_team = 1;
5247 }
5248 }
5249#endif // KMP_NESTED_HOT_TEAMS
5250
Jim Cownie5e8470a2013-09-27 10:38:44 +00005251 /* team is done working */
5252 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005253 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005254 // Do not reset pointer to parent team to NULL for hot teams.
5255
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005256 /* if we are non-hot team, release our threads */
5257 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005258 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005259 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005260 int tt_idx;
5261 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005262 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5263 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005264 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5265 team->t.t_threads[f]->th.th_task_team = NULL;
5266 }
5267 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005268#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005269 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005270#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005271 team->t.t_task_team[tt_idx] = NULL;
5272 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005273 }
5274 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005275
5276 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005277 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005278 team->t.t_level = 0;
5279 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005280
Jim Cownie5e8470a2013-09-27 10:38:44 +00005281 /* free the worker threads */
5282 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5283 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5284 __kmp_free_thread( team->t.t_threads[ f ] );
5285 team->t.t_threads[ f ] = NULL;
5286 }
5287
Jim Cownie5e8470a2013-09-27 10:38:44 +00005288 /* put the team back in the team pool */
5289 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005290 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005291 __kmp_team_pool = (volatile kmp_team_t*) team;
5292 }
5293
5294 KMP_MB();
5295}
5296
5297
5298/* reap the team. destroy it, reclaim all its resources and free its memory */
5299kmp_team_t *
5300__kmp_reap_team( kmp_team_t *team )
5301{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005302 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005303
5304 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005305 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5306 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5307 KMP_DEBUG_ASSERT( team->t.t_threads );
5308 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005309
5310 /* TODO clean the threads that are a part of this? */
5311
5312 /* free stuff */
5313
5314 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005315 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5316 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005317 __kmp_free( team );
5318
5319 KMP_MB();
5320 return next_pool;
5321}
5322
5323//
5324// Free the thread. Don't reap it, just place it on the pool of available
5325// threads.
5326//
5327// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5328// binding for the affinity mechanism to be useful.
5329//
5330// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5331// However, we want to avoid a potential performance problem by always
5332// scanning through the list to find the correct point at which to insert
5333// the thread (potential N**2 behavior). To do this we keep track of the
5334// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5335// With single-level parallelism, threads will always be added to the tail
5336// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5337// parallelism, all bets are off and we may need to scan through the entire
5338// free list.
5339//
5340// This change also has a potentially large performance benefit, for some
5341// applications. Previously, as threads were freed from the hot team, they
5342// would be placed back on the free list in inverse order. If the hot team
5343// grew back to it's original size, then the freed thread would be placed
5344// back on the hot team in reverse order. This could cause bad cache
5345// locality problems on programs where the size of the hot team regularly
5346// grew and shrunk.
5347//
5348// Now, for single-level parallelism, the OMP tid is alway == gtid.
5349//
5350void
5351__kmp_free_thread( kmp_info_t *this_th )
5352{
5353 int gtid;
5354 kmp_info_t **scan;
5355
5356 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5357 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5358
5359 KMP_DEBUG_ASSERT( this_th );
5360
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005361 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5362 int b;
5363 kmp_balign_t *balign = this_th->th.th_bar;
5364 for (b=0; b<bs_last_barrier; ++b) {
5365 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5366 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5367 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005368 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005369 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005370 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005371
Jim Cownie5e8470a2013-09-27 10:38:44 +00005372 /* put thread back on the free pool */
5373 TCW_PTR(this_th->th.th_team, NULL);
5374 TCW_PTR(this_th->th.th_root, NULL);
5375 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5376
5377 //
5378 // If the __kmp_thread_pool_insert_pt is already past the new insert
5379 // point, then we need to re-scan the entire list.
5380 //
5381 gtid = this_th->th.th_info.ds.ds_gtid;
5382 if ( __kmp_thread_pool_insert_pt != NULL ) {
5383 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5384 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5385 __kmp_thread_pool_insert_pt = NULL;
5386 }
5387 }
5388
5389 //
5390 // Scan down the list to find the place to insert the thread.
5391 // scan is the address of a link in the list, possibly the address of
5392 // __kmp_thread_pool itself.
5393 //
5394 // In the absence of nested parallism, the for loop will have 0 iterations.
5395 //
5396 if ( __kmp_thread_pool_insert_pt != NULL ) {
5397 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5398 }
5399 else {
5400 scan = (kmp_info_t **)&__kmp_thread_pool;
5401 }
5402 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5403 scan = &( (*scan)->th.th_next_pool ) );
5404
5405 //
5406 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5407 // to its address.
5408 //
5409 TCW_PTR(this_th->th.th_next_pool, *scan);
5410 __kmp_thread_pool_insert_pt = *scan = this_th;
5411 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5412 || ( this_th->th.th_info.ds.ds_gtid
5413 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5414 TCW_4(this_th->th.th_in_pool, TRUE);
5415 __kmp_thread_pool_nth++;
5416
5417 TCW_4(__kmp_nth, __kmp_nth - 1);
5418
5419#ifdef KMP_ADJUST_BLOCKTIME
5420 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005421 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005422 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5423 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5424 if ( __kmp_nth <= __kmp_avail_proc ) {
5425 __kmp_zero_bt = FALSE;
5426 }
5427 }
5428#endif /* KMP_ADJUST_BLOCKTIME */
5429
5430 KMP_MB();
5431}
5432
Jim Cownie5e8470a2013-09-27 10:38:44 +00005433
Jim Cownie5e8470a2013-09-27 10:38:44 +00005434/* ------------------------------------------------------------------------ */
5435
5436void *
5437__kmp_launch_thread( kmp_info_t *this_thr )
5438{
5439 int gtid = this_thr->th.th_info.ds.ds_gtid;
5440/* void *stack_data;*/
5441 kmp_team_t *(*volatile pteam);
5442
5443 KMP_MB();
5444 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5445
5446 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005447 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005448 }
5449
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005450#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005451 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005452 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5453 this_thr->th.ompt_thread_info.wait_id = 0;
5454 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005455 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005456 __ompt_thread_begin(ompt_thread_worker, gtid);
5457 }
5458 }
5459#endif
5460
Jim Cownie5e8470a2013-09-27 10:38:44 +00005461 /* This is the place where threads wait for work */
5462 while( ! TCR_4(__kmp_global.g.g_done) ) {
5463 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5464 KMP_MB();
5465
5466 /* wait for work to do */
5467 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5468
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005469#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005470 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005471 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5472 }
5473#endif
5474
Jim Cownie5e8470a2013-09-27 10:38:44 +00005475 /* No tid yet since not part of a team */
5476 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5477
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005478#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005479 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005480 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5481 }
5482#endif
5483
Jim Cownie5e8470a2013-09-27 10:38:44 +00005484 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5485
5486 /* have we been allocated? */
5487 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005488#if OMPT_SUPPORT
5489 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005490 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005491 if (ompt_enabled) {
5492 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005493 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005494 }
5495#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005496 /* we were just woken up, so run our new task */
5497 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5498 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005499 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5500 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005501
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005502 updateHWFPControl (*pteam);
5503
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005504#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005505 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005506 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005507 // Initialize OMPT task id for implicit task.
5508 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005509 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005510 }
5511#endif
5512
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005513 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005514 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5515 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005516 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005517 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005518 KMP_ASSERT( rc );
5519
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005520#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005521 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005522 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005523 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005524
5525 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5526 }
5527#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005528 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005529 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5530 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005531 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005532 /* join barrier after parallel region */
5533 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005534#if OMPT_SUPPORT && OMPT_TRACE
5535 if (ompt_enabled) {
5536 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005537 // don't access *pteam here: it may have already been freed
5538 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005539 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5540 my_parallel_id, task_info->task_id);
5541 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005542 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005543 task_info->task_id = 0;
5544 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005545#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005546 }
5547 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005548 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005549
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005550#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005551 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005552 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5553 __ompt_thread_end(ompt_thread_worker, gtid);
5554 }
5555#endif
5556
Jonathan Peyton54127982015-11-04 21:37:48 +00005557 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005558 /* run the destructors for the threadprivate data for this thread */
5559 __kmp_common_destroy_gtid( gtid );
5560
5561 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5562 KMP_MB();
5563 return this_thr;
5564}
5565
5566/* ------------------------------------------------------------------------ */
5567/* ------------------------------------------------------------------------ */
5568
Jim Cownie5e8470a2013-09-27 10:38:44 +00005569void
5570__kmp_internal_end_dest( void *specific_gtid )
5571{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005572 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005573 #pragma warning( push )
5574 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5575 #endif
5576 // Make sure no significant bits are lost
5577 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005578 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005579 #pragma warning( pop )
5580 #endif
5581
5582 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5583 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5584 * this is because 0 is reserved for the nothing-stored case */
5585
5586 /* josh: One reason for setting the gtid specific data even when it is being
5587 destroyed by pthread is to allow gtid lookup through thread specific data
5588 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5589 that gets executed in the call to __kmp_internal_end_thread, actually
5590 gets the gtid through the thread specific data. Setting it here seems
5591 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5592 to run smoothly.
5593 todo: get rid of this after we remove the dependence on
5594 __kmp_gtid_get_specific
5595 */
5596 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5597 __kmp_gtid_set_specific( gtid );
5598 #ifdef KMP_TDATA_GTID
5599 __kmp_gtid = gtid;
5600 #endif
5601 __kmp_internal_end_thread( gtid );
5602}
5603
Jonathan Peyton99016992015-05-26 17:32:53 +00005604#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005605
5606// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005607// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005608// option in makefile.mk works fine.
5609
5610__attribute__(( destructor ))
5611void
5612__kmp_internal_end_dtor( void )
5613{
5614 __kmp_internal_end_atexit();
5615}
5616
5617void
5618__kmp_internal_end_fini( void )
5619{
5620 __kmp_internal_end_atexit();
5621}
5622
5623#endif
5624
5625/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5626void
5627__kmp_internal_end_atexit( void )
5628{
5629 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5630 /* [Windows]
5631 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5632 stat code that depends on thread specific data for gtid fails because that data becomes
5633 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5634 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5635 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5636
5637// TODO: Can some of this comment about GVS be removed?
5638 I suspect that the offending stat code is executed when the calling thread tries to
5639 clean up a dead root thread's data structures, resulting in GVS code trying to close
5640 the GVS structures for that thread, but since the stat code uses
5641 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5642 cleaning up itself instead of another thread, it gets confused. This happens because
5643 allowing a thread to unregister and cleanup another thread is a recent modification for
5644 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5645 thread may end up trying to unregister another thread only if thread death does not
5646 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5647 specific data destructor function to detect thread death. For Windows dynamic, there
5648 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5649 workaround is applicable only for Windows static stat library.
5650 */
5651 __kmp_internal_end_library( -1 );
5652 #if KMP_OS_WINDOWS
5653 __kmp_close_console();
5654 #endif
5655}
5656
5657static void
5658__kmp_reap_thread(
5659 kmp_info_t * thread,
5660 int is_root
5661) {
5662
Alp Toker8f2d3f02014-02-24 10:40:15 +00005663 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005664
5665 int gtid;
5666
5667 KMP_DEBUG_ASSERT( thread != NULL );
5668
5669 gtid = thread->th.th_info.ds.ds_gtid;
5670
5671 if ( ! is_root ) {
5672
5673 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5674 /* Assume the threads are at the fork barrier here */
5675 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5676 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00005677 ANNOTATE_HAPPENS_BEFORE(thread);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005678 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5679 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005680 }; // if
5681
Jim Cownie5e8470a2013-09-27 10:38:44 +00005682 // Terminate OS thread.
5683 __kmp_reap_worker( thread );
5684
5685 //
5686 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005687 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005688 //
5689 // There is a small timing hole here - if the worker thread was
5690 // just waking up after sleeping in the pool, had reset it's
5691 // th_active_in_pool flag but not decremented the global counter
5692 // __kmp_thread_pool_active_nth yet, then the global counter
5693 // might not get updated.
5694 //
5695 // Currently, this can only happen as the library is unloaded,
5696 // so there are no harmful side effects.
5697 //
5698 if ( thread->th.th_active_in_pool ) {
5699 thread->th.th_active_in_pool = FALSE;
5700 KMP_TEST_THEN_DEC32(
5701 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5702 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5703 }
5704
5705 // Decrement # of [worker] threads in the pool.
5706 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5707 --__kmp_thread_pool_nth;
5708 }; // if
5709
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005710 __kmp_free_implicit_task(thread);
5711
Jim Cownie5e8470a2013-09-27 10:38:44 +00005712 // Free the fast memory for tasking
5713 #if USE_FAST_MEMORY
5714 __kmp_free_fast_memory( thread );
5715 #endif /* USE_FAST_MEMORY */
5716
5717 __kmp_suspend_uninitialize_thread( thread );
5718
5719 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5720 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5721
5722 -- __kmp_all_nth;
5723 // __kmp_nth was decremented when thread is added to the pool.
5724
5725#ifdef KMP_ADJUST_BLOCKTIME
5726 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005727 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005728 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5729 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5730 if ( __kmp_nth <= __kmp_avail_proc ) {
5731 __kmp_zero_bt = FALSE;
5732 }
5733 }
5734#endif /* KMP_ADJUST_BLOCKTIME */
5735
5736 /* free the memory being used */
5737 if( __kmp_env_consistency_check ) {
5738 if ( thread->th.th_cons ) {
5739 __kmp_free_cons_stack( thread->th.th_cons );
5740 thread->th.th_cons = NULL;
5741 }; // if
5742 }
5743
5744 if ( thread->th.th_pri_common != NULL ) {
5745 __kmp_free( thread->th.th_pri_common );
5746 thread->th.th_pri_common = NULL;
5747 }; // if
5748
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005749 if (thread->th.th_task_state_memo_stack != NULL) {
5750 __kmp_free(thread->th.th_task_state_memo_stack);
5751 thread->th.th_task_state_memo_stack = NULL;
5752 }
5753
Jim Cownie5e8470a2013-09-27 10:38:44 +00005754 #if KMP_USE_BGET
5755 if ( thread->th.th_local.bget_data != NULL ) {
5756 __kmp_finalize_bget( thread );
5757 }; // if
5758 #endif
5759
Alp Toker98758b02014-03-02 04:12:06 +00005760#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005761 if ( thread->th.th_affin_mask != NULL ) {
5762 KMP_CPU_FREE( thread->th.th_affin_mask );
5763 thread->th.th_affin_mask = NULL;
5764 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005765#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005766
5767 __kmp_reap_team( thread->th.th_serial_team );
5768 thread->th.th_serial_team = NULL;
5769 __kmp_free( thread );
5770
5771 KMP_MB();
5772
5773} // __kmp_reap_thread
5774
5775static void
5776__kmp_internal_end(void)
5777{
5778 int i;
5779
5780 /* First, unregister the library */
5781 __kmp_unregister_library();
5782
5783 #if KMP_OS_WINDOWS
5784 /* In Win static library, we can't tell when a root actually dies, so we
5785 reclaim the data structures for any root threads that have died but not
5786 unregistered themselves, in order to shut down cleanly.
5787 In Win dynamic library we also can't tell when a thread dies.
5788 */
5789 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5790 #endif
5791
5792 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5793 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005794 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005795 break;
5796 KMP_MB(); /* Flush all pending memory write invalidates. */
5797 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5798
5799 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005800#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005801 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5802 KMP_MB(); /* Flush all pending memory write invalidates. */
5803
5804 //
5805 // Need to check that monitor was initialized before reaping it.
5806 // If we are called form __kmp_atfork_child (which sets
5807 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5808 // contain valid data, but it is only valid in the parent process,
5809 // not the child.
5810 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005811 // New behavior (201008): instead of keying off of the flag
5812 // __kmp_init_parallel, the monitor thread creation is keyed off
5813 // of the new flag __kmp_init_monitor.
5814 //
5815 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5816 if ( TCR_4( __kmp_init_monitor ) ) {
5817 __kmp_reap_monitor( & __kmp_monitor );
5818 TCW_4( __kmp_init_monitor, 0 );
5819 }
5820 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5821 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005822#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005823 } else {
5824 /* TODO move this to cleanup code */
5825 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005826 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005827 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5828 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005829// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005830 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005831 }
5832 }
5833 #endif
5834
5835 KMP_MB();
5836
5837 // Reap the worker threads.
5838 // This is valid for now, but be careful if threads are reaped sooner.
5839 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5840 // Get the next thread from the pool.
5841 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5842 __kmp_thread_pool = thread->th.th_next_pool;
5843 // Reap it.
5844 thread->th.th_next_pool = NULL;
5845 thread->th.th_in_pool = FALSE;
5846 __kmp_reap_thread( thread, 0 );
5847 }; // while
5848 __kmp_thread_pool_insert_pt = NULL;
5849
5850 // Reap teams.
5851 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5852 // Get the next team from the pool.
5853 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5854 __kmp_team_pool = team->t.t_next_pool;
5855 // Reap it.
5856 team->t.t_next_pool = NULL;
5857 __kmp_reap_team( team );
5858 }; // while
5859
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005860 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861
5862 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5863 // TBD: Add some checking...
5864 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5865 }
5866
5867 /* Make sure all threadprivate destructors get run by joining with all worker
5868 threads before resetting this flag */
5869 TCW_SYNC_4(__kmp_init_common, FALSE);
5870
5871 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5872 KMP_MB();
5873
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005874#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005875 //
5876 // See note above: One of the possible fixes for CQ138434 / CQ140126
5877 //
5878 // FIXME: push both code fragments down and CSE them?
5879 // push them into __kmp_cleanup() ?
5880 //
5881 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5882 if ( TCR_4( __kmp_init_monitor ) ) {
5883 __kmp_reap_monitor( & __kmp_monitor );
5884 TCW_4( __kmp_init_monitor, 0 );
5885 }
5886 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5887 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005888#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005889 } /* else !__kmp_global.t_active */
5890 TCW_4(__kmp_init_gtid, FALSE);
5891 KMP_MB(); /* Flush all pending memory write invalidates. */
5892
Jim Cownie5e8470a2013-09-27 10:38:44 +00005893 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005894#if OMPT_SUPPORT
5895 ompt_fini();
5896#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005897}
5898
5899void
5900__kmp_internal_end_library( int gtid_req )
5901{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005902 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5903 /* this shouldn't be a race condition because __kmp_internal_end() is the
5904 * only place to clear __kmp_serial_init */
5905 /* we'll check this later too, after we get the lock */
5906 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5907 // because the next check will work in any case.
5908 if( __kmp_global.g.g_abort ) {
5909 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5910 /* TODO abort? */
5911 return;
5912 }
5913 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5914 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5915 return;
5916 }
5917
5918
5919 KMP_MB(); /* Flush all pending memory write invalidates. */
5920
5921 /* find out who we are and what we should do */
5922 {
5923 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5924 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5925 if( gtid == KMP_GTID_SHUTDOWN ) {
5926 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5927 return;
5928 } else if( gtid == KMP_GTID_MONITOR ) {
5929 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5930 return;
5931 } else if( gtid == KMP_GTID_DNE ) {
5932 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5933 /* we don't know who we are, but we may still shutdown the library */
5934 } else if( KMP_UBER_GTID( gtid )) {
5935 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005936 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005937 __kmp_global.g.g_abort = -1;
5938 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5939 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5940 return;
5941 } else {
5942 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5943 __kmp_unregister_root_current_thread( gtid );
5944 }
5945 } else {
5946 /* worker threads may call this function through the atexit handler, if they call exit() */
5947 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5948 TODO: do a thorough shutdown instead
5949 */
5950 #ifdef DUMP_DEBUG_ON_EXIT
5951 if ( __kmp_debug_buf )
5952 __kmp_dump_debug_buffer( );
5953 #endif
5954 return;
5955 }
5956 }
5957 /* synchronize the termination process */
5958 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5959
5960 /* have we already finished */
5961 if( __kmp_global.g.g_abort ) {
5962 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5963 /* TODO abort? */
5964 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5965 return;
5966 }
5967 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5968 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5969 return;
5970 }
5971
5972 /* We need this lock to enforce mutex between this reading of
5973 __kmp_threads_capacity and the writing by __kmp_register_root.
5974 Alternatively, we can use a counter of roots that is
5975 atomically updated by __kmp_get_global_thread_id_reg,
5976 __kmp_do_serial_initialize and __kmp_internal_end_*.
5977 */
5978 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5979
5980 /* now we can safely conduct the actual termination */
5981 __kmp_internal_end();
5982
5983 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5984 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5985
5986 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5987
5988 #ifdef DUMP_DEBUG_ON_EXIT
5989 if ( __kmp_debug_buf )
5990 __kmp_dump_debug_buffer();
5991 #endif
5992
5993 #if KMP_OS_WINDOWS
5994 __kmp_close_console();
5995 #endif
5996
5997 __kmp_fini_allocator();
5998
5999} // __kmp_internal_end_library
6000
6001void
6002__kmp_internal_end_thread( int gtid_req )
6003{
6004 int i;
6005
6006 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6007 /* this shouldn't be a race condition because __kmp_internal_end() is the
6008 * only place to clear __kmp_serial_init */
6009 /* we'll check this later too, after we get the lock */
6010 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6011 // because the next check will work in any case.
6012 if( __kmp_global.g.g_abort ) {
6013 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6014 /* TODO abort? */
6015 return;
6016 }
6017 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6018 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6019 return;
6020 }
6021
6022 KMP_MB(); /* Flush all pending memory write invalidates. */
6023
6024 /* find out who we are and what we should do */
6025 {
6026 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6027 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6028 if( gtid == KMP_GTID_SHUTDOWN ) {
6029 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6030 return;
6031 } else if( gtid == KMP_GTID_MONITOR ) {
6032 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6033 return;
6034 } else if( gtid == KMP_GTID_DNE ) {
6035 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6036 return;
6037 /* we don't know who we are */
6038 } else if( KMP_UBER_GTID( gtid )) {
6039 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006040 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006041 __kmp_global.g.g_abort = -1;
6042 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6043 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6044 return;
6045 } else {
6046 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6047 __kmp_unregister_root_current_thread( gtid );
6048 }
6049 } else {
6050 /* just a worker thread, let's leave */
6051 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6052
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006053 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006054 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006055 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006056
6057 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6058 return;
6059 }
6060 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006061 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006062 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6063 // because we will better shutdown later in the library destructor.
6064 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006065 // in a loop forks and joins many openmp threads. We can save a lot of time
6066 // keeping worker threads alive until the program shutdown.
6067 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6068 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006069 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006070 return;
6071 #endif
6072 /* synchronize the termination process */
6073 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6074
6075 /* have we already finished */
6076 if( __kmp_global.g.g_abort ) {
6077 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6078 /* TODO abort? */
6079 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6080 return;
6081 }
6082 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6083 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6084 return;
6085 }
6086
6087 /* We need this lock to enforce mutex between this reading of
6088 __kmp_threads_capacity and the writing by __kmp_register_root.
6089 Alternatively, we can use a counter of roots that is
6090 atomically updated by __kmp_get_global_thread_id_reg,
6091 __kmp_do_serial_initialize and __kmp_internal_end_*.
6092 */
6093
6094 /* should we finish the run-time? are all siblings done? */
6095 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6096
6097 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6098 if ( KMP_UBER_GTID( i ) ) {
6099 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6100 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6101 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6102 return;
6103 };
6104 }
6105
6106 /* now we can safely conduct the actual termination */
6107
6108 __kmp_internal_end();
6109
6110 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6111 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6112
Jim Cownie77c2a632014-09-03 11:34:33 +00006113 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006114
6115 #ifdef DUMP_DEBUG_ON_EXIT
6116 if ( __kmp_debug_buf )
6117 __kmp_dump_debug_buffer();
6118 #endif
6119} // __kmp_internal_end_thread
6120
6121// -------------------------------------------------------------------------------------------------
6122// Library registration stuff.
6123
6124static long __kmp_registration_flag = 0;
6125 // Random value used to indicate library initialization.
6126static char * __kmp_registration_str = NULL;
6127 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6128
6129
6130static inline
6131char *
6132__kmp_reg_status_name() {
6133 /*
6134 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6135 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6136 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6137 */
6138 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6139} // __kmp_reg_status_get
6140
6141
6142void
6143__kmp_register_library_startup(
6144 void
6145) {
6146
6147 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6148 int done = 0;
6149 union {
6150 double dtime;
6151 long ltime;
6152 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006153 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006154 __kmp_initialize_system_tick();
6155 #endif
6156 __kmp_read_system_time( & time.dtime );
6157 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6158 __kmp_registration_str =
6159 __kmp_str_format(
6160 "%p-%lx-%s",
6161 & __kmp_registration_flag,
6162 __kmp_registration_flag,
6163 KMP_LIBRARY_FILE
6164 );
6165
6166 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6167
6168 while ( ! done ) {
6169
6170 char * value = NULL; // Actual value of the environment variable.
6171
6172 // Set environment variable, but do not overwrite if it is exist.
6173 __kmp_env_set( name, __kmp_registration_str, 0 );
6174 // Check the variable is written.
6175 value = __kmp_env_get( name );
6176 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6177
6178 done = 1; // Ok, environment variable set successfully, exit the loop.
6179
6180 } else {
6181
6182 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6183 // Check whether it alive or dead.
6184 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6185 char * tail = value;
6186 char * flag_addr_str = NULL;
6187 char * flag_val_str = NULL;
6188 char const * file_name = NULL;
6189 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6190 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6191 file_name = tail;
6192 if ( tail != NULL ) {
6193 long * flag_addr = 0;
6194 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006195 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6196 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006197 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6198 // First, check whether environment-encoded address is mapped into addr space.
6199 // If so, dereference it to see if it still has the right value.
6200
6201 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6202 neighbor = 1;
6203 } else {
6204 // If not, then we know the other copy of the library is no longer running.
6205 neighbor = 2;
6206 }; // if
6207 }; // if
6208 }; // if
6209 switch ( neighbor ) {
6210 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6211 // Assume it is the incompatible format of future version of the library.
6212 // Assume the other library is alive.
6213 // WARN( ... ); // TODO: Issue a warning.
6214 file_name = "unknown library";
6215 // Attention! Falling to the next case. That's intentional.
6216 case 1 : { // Neighbor is alive.
6217 // Check it is allowed.
6218 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6219 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6220 // That's not allowed. Issue fatal error.
6221 __kmp_msg(
6222 kmp_ms_fatal,
6223 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6224 KMP_HNT( DuplicateLibrary ),
6225 __kmp_msg_null
6226 );
6227 }; // if
6228 KMP_INTERNAL_FREE( duplicate_ok );
6229 __kmp_duplicate_library_ok = 1;
6230 done = 1; // Exit the loop.
6231 } break;
6232 case 2 : { // Neighbor is dead.
6233 // Clear the variable and try to register library again.
6234 __kmp_env_unset( name );
6235 } break;
6236 default : {
6237 KMP_DEBUG_ASSERT( 0 );
6238 } break;
6239 }; // switch
6240
6241 }; // if
6242 KMP_INTERNAL_FREE( (void *) value );
6243
6244 }; // while
6245 KMP_INTERNAL_FREE( (void *) name );
6246
6247} // func __kmp_register_library_startup
6248
6249
6250void
6251__kmp_unregister_library( void ) {
6252
6253 char * name = __kmp_reg_status_name();
6254 char * value = __kmp_env_get( name );
6255
6256 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6257 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6258 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6259 // Ok, this is our variable. Delete it.
6260 __kmp_env_unset( name );
6261 }; // if
6262
6263 KMP_INTERNAL_FREE( __kmp_registration_str );
6264 KMP_INTERNAL_FREE( value );
6265 KMP_INTERNAL_FREE( name );
6266
6267 __kmp_registration_flag = 0;
6268 __kmp_registration_str = NULL;
6269
6270} // __kmp_unregister_library
6271
6272
6273// End of Library registration stuff.
6274// -------------------------------------------------------------------------------------------------
6275
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006276#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6277
6278static void __kmp_check_mic_type()
6279{
6280 kmp_cpuid_t cpuid_state = {0};
6281 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006282 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006283 // We don't support mic1 at the moment
6284 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6285 __kmp_mic_type = mic2;
6286 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6287 __kmp_mic_type = mic3;
6288 } else {
6289 __kmp_mic_type = non_mic;
6290 }
6291}
6292
6293#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6294
Jim Cownie5e8470a2013-09-27 10:38:44 +00006295static void
6296__kmp_do_serial_initialize( void )
6297{
6298 int i, gtid;
6299 int size;
6300
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006301 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006302
6303 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6304 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6305 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6306 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6307 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6308
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006309#if OMPT_SUPPORT
6310 ompt_pre_init();
6311#endif
6312
Jim Cownie5e8470a2013-09-27 10:38:44 +00006313 __kmp_validate_locks();
6314
6315 /* Initialize internal memory allocator */
6316 __kmp_init_allocator();
6317
6318 /* Register the library startup via an environment variable
6319 and check to see whether another copy of the library is already
6320 registered. */
6321
6322 __kmp_register_library_startup( );
6323
6324 /* TODO reinitialization of library */
6325 if( TCR_4(__kmp_global.g.g_done) ) {
6326 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6327 }
6328
6329 __kmp_global.g.g_abort = 0;
6330 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6331
6332 /* initialize the locks */
6333#if KMP_USE_ADAPTIVE_LOCKS
6334#if KMP_DEBUG_ADAPTIVE_LOCKS
6335 __kmp_init_speculative_stats();
6336#endif
6337#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006338#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00006339 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006340#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006341 __kmp_init_lock( & __kmp_global_lock );
6342 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6343 __kmp_init_lock( & __kmp_debug_lock );
6344 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6345 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6346 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6347 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6348 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6349 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6350 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6351 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6352 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6353 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6354 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6355 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6356 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6357 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6358 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006359#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006360 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006361#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6363
6364 /* conduct initialization and initial setup of configuration */
6365
6366 __kmp_runtime_initialize();
6367
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006368#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6369 __kmp_check_mic_type();
6370#endif
6371
Jim Cownie5e8470a2013-09-27 10:38:44 +00006372 // Some global variable initialization moved here from kmp_env_initialize()
6373#ifdef KMP_DEBUG
6374 kmp_diag = 0;
6375#endif
6376 __kmp_abort_delay = 0;
6377
6378 // From __kmp_init_dflt_team_nth()
6379 /* assume the entire machine will be used */
6380 __kmp_dflt_team_nth_ub = __kmp_xproc;
6381 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6382 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6383 }
6384 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6385 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6386 }
6387 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006388
6389 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6390 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006391#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006392 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6393 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006394#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006395 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6396 __kmp_library = library_throughput;
6397 // From KMP_SCHEDULE initialization
6398 __kmp_static = kmp_sch_static_balanced;
6399 // AC: do not use analytical here, because it is non-monotonous
6400 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006401 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006402 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6403 // control parts
6404 #if KMP_FAST_REDUCTION_BARRIER
6405 #define kmp_reduction_barrier_gather_bb ((int)1)
6406 #define kmp_reduction_barrier_release_bb ((int)1)
6407 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6408 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6409 #endif // KMP_FAST_REDUCTION_BARRIER
6410 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6411 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6412 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6413 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6414 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6415 #if KMP_FAST_REDUCTION_BARRIER
6416 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6417 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6418 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6419 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6420 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6421 }
6422 #endif // KMP_FAST_REDUCTION_BARRIER
6423 }
6424 #if KMP_FAST_REDUCTION_BARRIER
6425 #undef kmp_reduction_barrier_release_pat
6426 #undef kmp_reduction_barrier_gather_pat
6427 #undef kmp_reduction_barrier_release_bb
6428 #undef kmp_reduction_barrier_gather_bb
6429 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006430#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006431 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006432 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006433 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006434 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6435 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6436 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6437 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006438#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006439 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006440 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6441 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6442 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006443#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006444#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006445
6446 // From KMP_CHECKS initialization
6447#ifdef KMP_DEBUG
6448 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6449#else
6450 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6451#endif
6452
6453 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6454 __kmp_foreign_tp = TRUE;
6455
6456 __kmp_global.g.g_dynamic = FALSE;
6457 __kmp_global.g.g_dynamic_mode = dynamic_default;
6458
6459 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006460
Jim Cownie5e8470a2013-09-27 10:38:44 +00006461 // Print all messages in message catalog for testing purposes.
6462 #ifdef KMP_DEBUG
6463 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6464 if ( __kmp_str_match_true( val ) ) {
6465 kmp_str_buf_t buffer;
6466 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006467 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006468 __kmp_printf( "%s", buffer.str );
6469 __kmp_str_buf_free( & buffer );
6470 }; // if
6471 __kmp_env_free( & val );
6472 #endif
6473
Jim Cownie181b4bb2013-12-23 17:28:57 +00006474 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006475 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6476 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6477
Jim Cownie5e8470a2013-09-27 10:38:44 +00006478 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6479 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6480 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6481 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6482 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6483 __kmp_thread_pool = NULL;
6484 __kmp_thread_pool_insert_pt = NULL;
6485 __kmp_team_pool = NULL;
6486
6487 /* Allocate all of the variable sized records */
6488 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6489 /* Since allocation is cache-aligned, just add extra padding at the end */
6490 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6491 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6492 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6493
6494 /* init thread counts */
6495 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6496 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6497 __kmp_all_nth = 0;
6498 __kmp_nth = 0;
6499
6500 /* setup the uber master thread and hierarchy */
6501 gtid = __kmp_register_root( TRUE );
6502 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6503 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6504 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6505
6506 KMP_MB(); /* Flush all pending memory write invalidates. */
6507
6508 __kmp_common_initialize();
6509
6510 #if KMP_OS_UNIX
6511 /* invoke the child fork handler */
6512 __kmp_register_atfork();
6513 #endif
6514
Jonathan Peyton99016992015-05-26 17:32:53 +00006515 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006516 {
6517 /* Invoke the exit handler when the program finishes, only for static library.
6518 For dynamic library, we already have _fini and DllMain.
6519 */
6520 int rc = atexit( __kmp_internal_end_atexit );
6521 if ( rc != 0 ) {
6522 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6523 }; // if
6524 }
6525 #endif
6526
6527 #if KMP_HANDLE_SIGNALS
6528 #if KMP_OS_UNIX
6529 /* NOTE: make sure that this is called before the user installs
6530 * their own signal handlers so that the user handlers
6531 * are called first. this way they can return false,
6532 * not call our handler, avoid terminating the library,
6533 * and continue execution where they left off. */
6534 __kmp_install_signals( FALSE );
6535 #endif /* KMP_OS_UNIX */
6536 #if KMP_OS_WINDOWS
6537 __kmp_install_signals( TRUE );
6538 #endif /* KMP_OS_WINDOWS */
6539 #endif
6540
6541 /* we have finished the serial initialization */
6542 __kmp_init_counter ++;
6543
6544 __kmp_init_serial = TRUE;
6545
6546 if (__kmp_settings) {
6547 __kmp_env_print();
6548 }
6549
6550#if OMP_40_ENABLED
6551 if (__kmp_display_env || __kmp_display_env_verbose) {
6552 __kmp_env_print_2();
6553 }
6554#endif // OMP_40_ENABLED
6555
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006556#if OMPT_SUPPORT
6557 ompt_post_init();
6558#endif
6559
Jim Cownie5e8470a2013-09-27 10:38:44 +00006560 KMP_MB();
6561
6562 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6563}
6564
6565void
6566__kmp_serial_initialize( void )
6567{
6568 if ( __kmp_init_serial ) {
6569 return;
6570 }
6571 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6572 if ( __kmp_init_serial ) {
6573 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6574 return;
6575 }
6576 __kmp_do_serial_initialize();
6577 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6578}
6579
6580static void
6581__kmp_do_middle_initialize( void )
6582{
6583 int i, j;
6584 int prev_dflt_team_nth;
6585
6586 if( !__kmp_init_serial ) {
6587 __kmp_do_serial_initialize();
6588 }
6589
6590 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6591
6592 //
6593 // Save the previous value for the __kmp_dflt_team_nth so that
6594 // we can avoid some reinitialization if it hasn't changed.
6595 //
6596 prev_dflt_team_nth = __kmp_dflt_team_nth;
6597
Alp Toker98758b02014-03-02 04:12:06 +00006598#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006599 //
6600 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6601 // number of cores on the machine.
6602 //
6603 __kmp_affinity_initialize();
6604
6605 //
6606 // Run through the __kmp_threads array and set the affinity mask
6607 // for each root thread that is currently registered with the RTL.
6608 //
6609 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6610 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6611 __kmp_affinity_set_init_mask( i, TRUE );
6612 }
6613 }
Alp Toker98758b02014-03-02 04:12:06 +00006614#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006615
6616 KMP_ASSERT( __kmp_xproc > 0 );
6617 if ( __kmp_avail_proc == 0 ) {
6618 __kmp_avail_proc = __kmp_xproc;
6619 }
6620
6621 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6622 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006623 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006624 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6625 j++;
6626 }
6627
6628 if ( __kmp_dflt_team_nth == 0 ) {
6629#ifdef KMP_DFLT_NTH_CORES
6630 //
6631 // Default #threads = #cores
6632 //
6633 __kmp_dflt_team_nth = __kmp_ncores;
6634 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6635 __kmp_dflt_team_nth ) );
6636#else
6637 //
6638 // Default #threads = #available OS procs
6639 //
6640 __kmp_dflt_team_nth = __kmp_avail_proc;
6641 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6642 __kmp_dflt_team_nth ) );
6643#endif /* KMP_DFLT_NTH_CORES */
6644 }
6645
6646 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6647 __kmp_dflt_team_nth = KMP_MIN_NTH;
6648 }
6649 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6650 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6651 }
6652
6653 //
6654 // There's no harm in continuing if the following check fails,
6655 // but it indicates an error in the previous logic.
6656 //
6657 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6658
6659 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6660 //
6661 // Run through the __kmp_threads array and set the num threads icv
6662 // for each root thread that is currently registered with the RTL
6663 // (which has not already explicitly set its nthreads-var with a
6664 // call to omp_set_num_threads()).
6665 //
6666 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6667 kmp_info_t *thread = __kmp_threads[ i ];
6668 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006669 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006670
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006671 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006672 }
6673 }
6674 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6675 __kmp_dflt_team_nth) );
6676
6677#ifdef KMP_ADJUST_BLOCKTIME
6678 /* Adjust blocktime to zero if necessary */
6679 /* now that __kmp_avail_proc is set */
6680 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6681 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6682 if ( __kmp_nth > __kmp_avail_proc ) {
6683 __kmp_zero_bt = TRUE;
6684 }
6685 }
6686#endif /* KMP_ADJUST_BLOCKTIME */
6687
6688 /* we have finished middle initialization */
6689 TCW_SYNC_4(__kmp_init_middle, TRUE);
6690
6691 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6692}
6693
6694void
6695__kmp_middle_initialize( void )
6696{
6697 if ( __kmp_init_middle ) {
6698 return;
6699 }
6700 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6701 if ( __kmp_init_middle ) {
6702 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6703 return;
6704 }
6705 __kmp_do_middle_initialize();
6706 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6707}
6708
6709void
6710__kmp_parallel_initialize( void )
6711{
6712 int gtid = __kmp_entry_gtid(); // this might be a new root
6713
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006714 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006715 if( TCR_4(__kmp_init_parallel) ) return;
6716 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6717 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6718
6719 /* TODO reinitialization after we have already shut down */
6720 if( TCR_4(__kmp_global.g.g_done) ) {
6721 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6722 __kmp_infinite_loop();
6723 }
6724
6725 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6726 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6727 */
6728 if( !__kmp_init_middle ) {
6729 __kmp_do_middle_initialize();
6730 }
6731
6732 /* begin initialization */
6733 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6734 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6735
6736#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6737 //
6738 // Save the FP control regs.
6739 // Worker threads will set theirs to these values at thread startup.
6740 //
6741 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6742 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6743 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6744#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6745
6746#if KMP_OS_UNIX
6747# if KMP_HANDLE_SIGNALS
6748 /* must be after __kmp_serial_initialize */
6749 __kmp_install_signals( TRUE );
6750# endif
6751#endif
6752
6753 __kmp_suspend_initialize();
6754
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006755#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006756 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6757 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6758 }
6759#else
6760 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6761 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6762 }
6763#endif
6764
6765 if ( __kmp_version ) {
6766 __kmp_print_version_2();
6767 }
6768
Jim Cownie5e8470a2013-09-27 10:38:44 +00006769 /* we have finished parallel initialization */
6770 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6771
6772 KMP_MB();
6773 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6774
6775 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6776}
6777
6778
6779/* ------------------------------------------------------------------------ */
6780
6781void
6782__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6783 kmp_team_t *team )
6784{
6785 kmp_disp_t *dispatch;
6786
6787 KMP_MB();
6788
6789 /* none of the threads have encountered any constructs, yet. */
6790 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006791#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006792 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006793#endif /* KMP_CACHE_MANAGE */
6794 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6795 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006796 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6797 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006798
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006799 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006800#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006801 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6802#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006803 if( __kmp_env_consistency_check )
6804 __kmp_push_parallel( gtid, team->t.t_ident );
6805
6806 KMP_MB(); /* Flush all pending memory write invalidates. */
6807}
6808
6809void
6810__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6811 kmp_team_t *team )
6812{
6813 if( __kmp_env_consistency_check )
6814 __kmp_pop_parallel( gtid, team->t.t_ident );
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006815
6816 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006817}
6818
6819int
6820__kmp_invoke_task_func( int gtid )
6821{
6822 int rc;
6823 int tid = __kmp_tid_from_gtid( gtid );
6824 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006825 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006826
6827 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6828#if USE_ITT_BUILD
6829 if ( __itt_stack_caller_create_ptr ) {
6830 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6831 }
6832#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006833#if INCLUDE_SSC_MARKS
6834 SSC_MARK_INVOKING();
6835#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006836
6837#if OMPT_SUPPORT
6838 void *dummy;
6839 void **exit_runtime_p;
6840 ompt_task_id_t my_task_id;
6841 ompt_parallel_id_t my_parallel_id;
6842
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006843 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006844 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6845 ompt_task_info.frame.exit_runtime_frame);
6846 } else {
6847 exit_runtime_p = &dummy;
6848 }
6849
6850#if OMPT_TRACE
6851 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6852 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006853 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006854 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6855 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6856 my_parallel_id, my_task_id);
6857 }
6858#endif
6859#endif
6860
Jonathan Peyton45be4502015-08-11 21:36:41 +00006861 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006862 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6863 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006864 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6865 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006866#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006867 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006868#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006869 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006870#if OMPT_SUPPORT
6871 *exit_runtime_p = NULL;
6872#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006873 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006874
Jim Cownie5e8470a2013-09-27 10:38:44 +00006875#if USE_ITT_BUILD
6876 if ( __itt_stack_caller_create_ptr ) {
6877 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6878 }
6879#endif /* USE_ITT_BUILD */
6880 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6881
6882 return rc;
6883}
6884
6885#if OMP_40_ENABLED
6886void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006887__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006888{
6889 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006890 kmp_info_t *thr = __kmp_threads[ gtid ];
6891 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006892 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006893 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6894 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6895 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006896 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006897 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006898 // Launch league of teams now, but not let workers execute
6899 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006900#if INCLUDE_SSC_MARKS
6901 SSC_MARK_FORKING();
6902#endif
6903 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006904 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006905#if OMPT_SUPPORT
6906 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6907#endif
6908 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006909 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6910 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006911#if INCLUDE_SSC_MARKS
6912 SSC_MARK_JOINING();
6913#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006914
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006915 // AC: last parameter "1" eliminates join barrier which won't work because
6916 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006917 __kmp_join_call( loc, gtid
6918#if OMPT_SUPPORT
6919 , fork_context_intel
6920#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006921 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006922}
6923
6924int
6925__kmp_invoke_teams_master( int gtid )
6926{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006927 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6928 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006929 #if KMP_DEBUG
6930 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6931 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6932 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006933 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6934 __kmp_teams_master( gtid );
6935 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006936 return 1;
6937}
6938#endif /* OMP_40_ENABLED */
6939
6940/* this sets the requested number of threads for the next parallel region
6941 * encountered by this team */
6942/* since this should be enclosed in the forkjoin critical section it
6943 * should avoid race conditions with assymmetrical nested parallelism */
6944
6945void
6946__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6947{
6948 kmp_info_t *thr = __kmp_threads[gtid];
6949
6950 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006951 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006952}
6953
6954#if OMP_40_ENABLED
6955
6956/* this sets the requested number of teams for the teams region and/or
6957 * the number of threads for the next parallel region encountered */
6958void
6959__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6960{
6961 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006962 KMP_DEBUG_ASSERT(num_teams >= 0);
6963 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006964
6965 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006966 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006967 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6968 if ( !__kmp_reserve_warn ) {
6969 __kmp_reserve_warn = 1;
6970 __kmp_msg(
6971 kmp_ms_warning,
6972 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6973 KMP_HNT( Unset_ALL_THREADS ),
6974 __kmp_msg_null
6975 );
6976 }
6977 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006978 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006979 // Set number of teams (number of threads in the outer "parallel" of the teams)
6980 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6981
6982 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006983 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006984 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006985 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006986 num_threads = __kmp_avail_proc / num_teams;
6987 if( num_teams * num_threads > __kmp_max_nth ) {
6988 // adjust num_threads w/o warning as it is not user setting
6989 num_threads = __kmp_max_nth / num_teams;
6990 }
6991 } else {
6992 if( num_teams * num_threads > __kmp_max_nth ) {
6993 int new_threads = __kmp_max_nth / num_teams;
6994 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6995 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6996 __kmp_msg(
6997 kmp_ms_warning,
6998 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6999 KMP_HNT( Unset_ALL_THREADS ),
7000 __kmp_msg_null
7001 );
7002 }
7003 num_threads = new_threads;
7004 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007005 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007006 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007007}
7008
7009
7010//
7011// Set the proc_bind var to use in the following parallel region.
7012//
7013void
7014__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7015{
7016 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007017 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007018}
7019
7020#endif /* OMP_40_ENABLED */
7021
7022/* Launch the worker threads into the microtask. */
7023
7024void
7025__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7026{
7027 kmp_info_t *this_thr = __kmp_threads[gtid];
7028
7029#ifdef KMP_DEBUG
7030 int f;
7031#endif /* KMP_DEBUG */
7032
7033 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007034 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7036 KMP_MB(); /* Flush all pending memory write invalidates. */
7037
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007038 team->t.t_construct = 0; /* no single directives seen yet */
7039 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007040
7041 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007042 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007043 if ( team->t.t_max_nproc > 1 ) {
7044 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007045 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007046 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007047#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007048 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7049#endif
7050 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007051 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007052 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007053#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007054 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7055#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007056 }
7057
7058 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007059 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007060
7061#ifdef KMP_DEBUG
7062 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7063 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7064 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7065 }
7066#endif /* KMP_DEBUG */
7067
7068 /* release the worker threads so they may begin working */
7069 __kmp_fork_barrier( gtid, 0 );
7070}
7071
7072
7073void
7074__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7075{
7076 kmp_info_t *this_thr = __kmp_threads[gtid];
7077
7078 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007079 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007080 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7081 KMP_MB(); /* Flush all pending memory write invalidates. */
7082
7083 /* Join barrier after fork */
7084
7085#ifdef KMP_DEBUG
7086 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7087 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7088 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7089 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7090 __kmp_print_structure();
7091 }
7092 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7093 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7094#endif /* KMP_DEBUG */
7095
7096 __kmp_join_barrier( gtid ); /* wait for everyone */
7097
7098 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007099 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007100}
7101
7102
7103/* ------------------------------------------------------------------------ */
7104/* ------------------------------------------------------------------------ */
7105
7106#ifdef USE_LOAD_BALANCE
7107
7108//
7109// Return the worker threads actively spinning in the hot team, if we
7110// are at the outermost level of parallelism. Otherwise, return 0.
7111//
7112static int
7113__kmp_active_hot_team_nproc( kmp_root_t *root )
7114{
7115 int i;
7116 int retval;
7117 kmp_team_t *hot_team;
7118
7119 if ( root->r.r_active ) {
7120 return 0;
7121 }
7122 hot_team = root->r.r_hot_team;
7123 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7124 return hot_team->t.t_nproc - 1; // Don't count master thread
7125 }
7126
7127 //
7128 // Skip the master thread - it is accounted for elsewhere.
7129 //
7130 retval = 0;
7131 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7132 if ( hot_team->t.t_threads[i]->th.th_active ) {
7133 retval++;
7134 }
7135 }
7136 return retval;
7137}
7138
7139//
7140// Perform an automatic adjustment to the number of
7141// threads used by the next parallel region.
7142//
7143static int
7144__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7145{
7146 int retval;
7147 int pool_active;
7148 int hot_team_active;
7149 int team_curr_active;
7150 int system_active;
7151
7152 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7153 root, set_nproc ) );
7154 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007155 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007156 KMP_DEBUG_ASSERT( set_nproc > 1 );
7157
7158 if ( set_nproc == 1) {
7159 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7160 return 1;
7161 }
7162
7163 //
7164 // Threads that are active in the thread pool, active in the hot team
7165 // for this particular root (if we are at the outer par level), and
7166 // the currently executing thread (to become the master) are available
7167 // to add to the new team, but are currently contributing to the system
7168 // load, and must be accounted for.
7169 //
7170 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7171 hot_team_active = __kmp_active_hot_team_nproc( root );
7172 team_curr_active = pool_active + hot_team_active + 1;
7173
7174 //
7175 // Check the system load.
7176 //
7177 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7178 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7179 system_active, pool_active, hot_team_active ) );
7180
7181 if ( system_active < 0 ) {
7182 //
7183 // There was an error reading the necessary info from /proc,
7184 // so use the thread limit algorithm instead. Once we set
7185 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7186 // we shouldn't wind up getting back here.
7187 //
7188 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7189 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7190
7191 //
7192 // Make this call behave like the thread limit algorithm.
7193 //
7194 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7195 : root->r.r_hot_team->t.t_nproc);
7196 if ( retval > set_nproc ) {
7197 retval = set_nproc;
7198 }
7199 if ( retval < KMP_MIN_NTH ) {
7200 retval = KMP_MIN_NTH;
7201 }
7202
7203 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7204 return retval;
7205 }
7206
7207 //
7208 // There is a slight delay in the load balance algorithm in detecting
7209 // new running procs. The real system load at this instant should be
7210 // at least as large as the #active omp thread that are available to
7211 // add to the team.
7212 //
7213 if ( system_active < team_curr_active ) {
7214 system_active = team_curr_active;
7215 }
7216 retval = __kmp_avail_proc - system_active + team_curr_active;
7217 if ( retval > set_nproc ) {
7218 retval = set_nproc;
7219 }
7220 if ( retval < KMP_MIN_NTH ) {
7221 retval = KMP_MIN_NTH;
7222 }
7223
7224 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7225 return retval;
7226} // __kmp_load_balance_nproc()
7227
7228#endif /* USE_LOAD_BALANCE */
7229
Jim Cownie5e8470a2013-09-27 10:38:44 +00007230/* ------------------------------------------------------------------------ */
7231/* ------------------------------------------------------------------------ */
7232
7233/* NOTE: this is called with the __kmp_init_lock held */
7234void
7235__kmp_cleanup( void )
7236{
7237 int f;
7238
7239 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7240
7241 if (TCR_4(__kmp_init_parallel)) {
7242#if KMP_HANDLE_SIGNALS
7243 __kmp_remove_signals();
7244#endif
7245 TCW_4(__kmp_init_parallel, FALSE);
7246 }
7247
7248 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007249#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007250 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007251#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007252 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007253 TCW_4(__kmp_init_middle, FALSE);
7254 }
7255
7256 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7257
7258 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007259 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007260 __kmp_init_serial = FALSE;
7261 }
7262
7263 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7264 if ( __kmp_root[ f ] != NULL ) {
7265 __kmp_free( __kmp_root[ f ] );
7266 __kmp_root[ f ] = NULL;
7267 }
7268 }
7269 __kmp_free( __kmp_threads );
7270 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7271 // freeing __kmp_root.
7272 __kmp_threads = NULL;
7273 __kmp_root = NULL;
7274 __kmp_threads_capacity = 0;
7275
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007276#if KMP_USE_DYNAMIC_LOCK
7277 __kmp_cleanup_indirect_user_locks();
7278#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007279 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007280#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007281
Alp Toker98758b02014-03-02 04:12:06 +00007282 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007283 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7284 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007285 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007286
7287 #if KMP_USE_ADAPTIVE_LOCKS
7288 #if KMP_DEBUG_ADAPTIVE_LOCKS
7289 __kmp_print_speculative_stats();
7290 #endif
7291 #endif
7292 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7293 __kmp_nested_nth.nth = NULL;
7294 __kmp_nested_nth.size = 0;
7295 __kmp_nested_nth.used = 0;
7296
7297 __kmp_i18n_catclose();
7298
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007299#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00007300 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007301#endif
7302
Jim Cownie5e8470a2013-09-27 10:38:44 +00007303 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7304}
7305
7306/* ------------------------------------------------------------------------ */
7307/* ------------------------------------------------------------------------ */
7308
7309int
7310__kmp_ignore_mppbeg( void )
7311{
7312 char *env;
7313
7314 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7315 if (__kmp_str_match_false( env ))
7316 return FALSE;
7317 }
7318 // By default __kmpc_begin() is no-op.
7319 return TRUE;
7320}
7321
7322int
7323__kmp_ignore_mppend( void )
7324{
7325 char *env;
7326
7327 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7328 if (__kmp_str_match_false( env ))
7329 return FALSE;
7330 }
7331 // By default __kmpc_end() is no-op.
7332 return TRUE;
7333}
7334
7335void
7336__kmp_internal_begin( void )
7337{
7338 int gtid;
7339 kmp_root_t *root;
7340
7341 /* this is a very important step as it will register new sibling threads
7342 * and assign these new uber threads a new gtid */
7343 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007344 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007345 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7346
7347 if( root->r.r_begin ) return;
7348 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7349 if( root->r.r_begin ) {
7350 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7351 return;
7352 }
7353
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007354 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007355
7356 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7357}
7358
7359
7360/* ------------------------------------------------------------------------ */
7361/* ------------------------------------------------------------------------ */
7362
7363void
7364__kmp_user_set_library (enum library_type arg)
7365{
7366 int gtid;
7367 kmp_root_t *root;
7368 kmp_info_t *thread;
7369
7370 /* first, make sure we are initialized so we can get our gtid */
7371
7372 gtid = __kmp_entry_gtid();
7373 thread = __kmp_threads[ gtid ];
7374
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007375 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007376
7377 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7378 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7379 KMP_WARNING( SetLibraryIncorrectCall );
7380 return;
7381 }
7382
7383 switch ( arg ) {
7384 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007385 thread->th.th_set_nproc = 0;
7386 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007387 break;
7388 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007389 thread->th.th_set_nproc = 0;
7390 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007391 break;
7392 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007393 thread->th.th_set_nproc = 0;
7394 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007395 break;
7396 default:
7397 KMP_FATAL( UnknownLibraryType, arg );
7398 }
7399
7400 __kmp_aux_set_library ( arg );
7401}
7402
7403void
7404__kmp_aux_set_stacksize( size_t arg )
7405{
7406 if (! __kmp_init_serial)
7407 __kmp_serial_initialize();
7408
7409#if KMP_OS_DARWIN
7410 if (arg & (0x1000 - 1)) {
7411 arg &= ~(0x1000 - 1);
7412 if(arg + 0x1000) /* check for overflow if we round up */
7413 arg += 0x1000;
7414 }
7415#endif
7416 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7417
7418 /* only change the default stacksize before the first parallel region */
7419 if (! TCR_4(__kmp_init_parallel)) {
7420 size_t value = arg; /* argument is in bytes */
7421
7422 if (value < __kmp_sys_min_stksize )
7423 value = __kmp_sys_min_stksize ;
7424 else if (value > KMP_MAX_STKSIZE)
7425 value = KMP_MAX_STKSIZE;
7426
7427 __kmp_stksize = value;
7428
7429 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7430 }
7431
7432 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7433}
7434
7435/* set the behaviour of the runtime library */
7436/* TODO this can cause some odd behaviour with sibling parallelism... */
7437void
7438__kmp_aux_set_library (enum library_type arg)
7439{
7440 __kmp_library = arg;
7441
7442 switch ( __kmp_library ) {
7443 case library_serial :
7444 {
7445 KMP_INFORM( LibraryIsSerial );
7446 (void) __kmp_change_library( TRUE );
7447 }
7448 break;
7449 case library_turnaround :
7450 (void) __kmp_change_library( TRUE );
7451 break;
7452 case library_throughput :
7453 (void) __kmp_change_library( FALSE );
7454 break;
7455 default:
7456 KMP_FATAL( UnknownLibraryType, arg );
7457 }
7458}
7459
7460/* ------------------------------------------------------------------------ */
7461/* ------------------------------------------------------------------------ */
7462
7463void
7464__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7465{
7466 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007467#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007468 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007469#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007470 int bt_set;
7471
7472 __kmp_save_internal_controls( thread );
7473
7474 /* Normalize and set blocktime for the teams */
7475 if (blocktime < KMP_MIN_BLOCKTIME)
7476 blocktime = KMP_MIN_BLOCKTIME;
7477 else if (blocktime > KMP_MAX_BLOCKTIME)
7478 blocktime = KMP_MAX_BLOCKTIME;
7479
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007480 set__blocktime_team( thread->th.th_team, tid, blocktime );
7481 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007482
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007483#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007484 /* Calculate and set blocktime intervals for the teams */
7485 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7486
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007487 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7488 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007489#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007490
7491 /* Set whether blocktime has been set to "TRUE" */
7492 bt_set = TRUE;
7493
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007494 set__bt_set_team( thread->th.th_team, tid, bt_set );
7495 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007496#if KMP_USE_MONITOR
Samuel Antao33515192016-10-20 13:20:17 +00007497 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7498 "bt_intervals=%d, monitor_updates=%d\n",
7499 __kmp_gtid_from_tid(tid, thread->th.th_team),
7500 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7501 __kmp_monitor_wakeups));
7502#else
7503 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7504 __kmp_gtid_from_tid(tid, thread->th.th_team),
7505 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007506#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007507}
7508
7509void
7510__kmp_aux_set_defaults(
7511 char const * str,
7512 int len
7513) {
7514 if ( ! __kmp_init_serial ) {
7515 __kmp_serial_initialize();
7516 };
7517 __kmp_env_initialize( str );
7518
7519 if (__kmp_settings
7520#if OMP_40_ENABLED
7521 || __kmp_display_env || __kmp_display_env_verbose
7522#endif // OMP_40_ENABLED
7523 ) {
7524 __kmp_env_print();
7525 }
7526} // __kmp_aux_set_defaults
7527
7528/* ------------------------------------------------------------------------ */
7529
7530/*
7531 * internal fast reduction routines
7532 */
7533
Jim Cownie5e8470a2013-09-27 10:38:44 +00007534PACKED_REDUCTION_METHOD_T
7535__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7536 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7537 kmp_critical_name *lck )
7538{
7539
7540 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7541 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7542 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7543 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7544
7545 PACKED_REDUCTION_METHOD_T retval;
7546
7547 int team_size;
7548
7549 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7550 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7551
7552 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7553 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7554
7555 retval = critical_reduce_block;
7556
7557 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7558
7559 if( team_size == 1 ) {
7560
7561 retval = empty_reduce_block;
7562
7563 } else {
7564
7565 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7566 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7567
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007568 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007569
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007570 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007571
7572 int teamsize_cutoff = 4;
7573
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007574#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7575 if( __kmp_mic_type != non_mic ) {
7576 teamsize_cutoff = 8;
7577 }
7578#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007579 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007580 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007581 if ( atomic_available ) {
7582 retval = atomic_reduce_block;
7583 }
7584 } else {
7585 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7586 }
7587 } else if ( atomic_available ) {
7588 retval = atomic_reduce_block;
7589 }
7590 #else
7591 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007592 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007593
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007594 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007595
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007596 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007597
Jim Cownie5e8470a2013-09-27 10:38:44 +00007598 // basic tuning
7599
7600 if( atomic_available ) {
7601 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7602 retval = atomic_reduce_block;
7603 }
7604 } // otherwise: use critical section
7605
7606 #elif KMP_OS_DARWIN
7607
Jim Cownie5e8470a2013-09-27 10:38:44 +00007608 if( atomic_available && ( num_vars <= 3 ) ) {
7609 retval = atomic_reduce_block;
7610 } else if( tree_available ) {
7611 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7612 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7613 }
7614 } // otherwise: use critical section
7615
7616 #else
7617 #error "Unknown or unsupported OS"
7618 #endif
7619
7620 #else
7621 #error "Unknown or unsupported architecture"
7622 #endif
7623
7624 }
7625
Jim Cownie5e8470a2013-09-27 10:38:44 +00007626 // KMP_FORCE_REDUCTION
7627
Andrey Churbanovec23a952015-08-17 10:12:12 +00007628 // If the team is serialized (team_size == 1), ignore the forced reduction
7629 // method and stay with the unsynchronized method (empty_reduce_block)
7630 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007631
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007632 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007633
7634 int atomic_available, tree_available;
7635
7636 switch( ( forced_retval = __kmp_force_reduction_method ) )
7637 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007638 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007639 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007640 break;
7641
7642 case atomic_reduce_block:
7643 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007644 if( ! atomic_available ) {
7645 KMP_WARNING(RedMethodNotSupported, "atomic");
7646 forced_retval = critical_reduce_block;
7647 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007648 break;
7649
7650 case tree_reduce_block:
7651 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007652 if( ! tree_available ) {
7653 KMP_WARNING(RedMethodNotSupported, "tree");
7654 forced_retval = critical_reduce_block;
7655 } else {
7656 #if KMP_FAST_REDUCTION_BARRIER
7657 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7658 #endif
7659 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007660 break;
7661
7662 default:
7663 KMP_ASSERT( 0 ); // "unsupported method specified"
7664 }
7665
7666 retval = forced_retval;
7667 }
7668
7669 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7670
7671 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7672 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7673
7674 return ( retval );
7675}
7676
7677// this function is for testing set/get/determine reduce method
7678kmp_int32
7679__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007680 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007681}
7682
7683/* ------------------------------------------------------------------------ */