blob: b62d98e97cd9aaf94b8c3622b473700d07af6476 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000028#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
44char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
45#endif /* defined(KMP_GOMP_COMPAT) */
46
47char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000048#if OMP_50_ENABLED
49 "5.0 (201611)";
50#elif OMP_45_ENABLED
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051 "4.5 (201511)";
52#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000053 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#endif
57
58#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000059char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jim Cownie181b4bb2013-12-23 17:28:57 +000062#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
63
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
65/* ------------------------------------------------------------------------ */
66
67kmp_info_t __kmp_monitor;
68
69/* ------------------------------------------------------------------------ */
70/* ------------------------------------------------------------------------ */
71
72/* Forward declarations */
73
74void __kmp_cleanup( void );
75
76static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000077static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000078#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000079static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000082void __kmp_fork_barrier( int gtid, int tid );
83void __kmp_join_barrier( int gtid );
84void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000085
Jim Cownie5e8470a2013-09-27 10:38:44 +000086#ifdef USE_LOAD_BALANCE
87static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
88#endif
89
90static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000091#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000092static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000093#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000094static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
95static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
96static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
97
98/* ------------------------------------------------------------------------ */
99/* ------------------------------------------------------------------------ */
100
101/* Calculate the identifier of the current thread */
102/* fast (and somewhat portable) way to get unique */
103/* identifier of executing thread. */
104/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
105
106int
107__kmp_get_global_thread_id( )
108{
109 int i;
110 kmp_info_t **other_threads;
111 size_t stack_data;
112 char *stack_addr;
113 size_t stack_size;
114 char *stack_base;
115
116 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
117 __kmp_nth, __kmp_all_nth ));
118
119 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
120 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
121 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
122 __kmp_init_gtid for this to work. */
123
124 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
125
126#ifdef KMP_TDATA_GTID
127 if ( TCR_4(__kmp_gtid_mode) >= 3) {
128 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
129 return __kmp_gtid;
130 }
131#endif
132 if ( TCR_4(__kmp_gtid_mode) >= 2) {
133 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
134 return __kmp_gtid_get_specific();
135 }
136 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
137
138 stack_addr = (char*) & stack_data;
139 other_threads = __kmp_threads;
140
141 /*
142 ATT: The code below is a source of potential bugs due to unsynchronized access to
143 __kmp_threads array. For example:
144 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
145 2. Current thread is suspended by OS.
146 3. Another thread unregisters and finishes (debug versions of free() may fill memory
147 with something like 0xEF).
148 4. Current thread is resumed.
149 5. Current thread reads junk from *thr.
150 TODO: Fix it.
151 --ln
152 */
153
154 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
155
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 if( !thr ) continue;
158
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000159 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
160 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161
162 /* stack grows down -- search through all of the active threads */
163
164 if( stack_addr <= stack_base ) {
165 size_t stack_diff = stack_base - stack_addr;
166
167 if( stack_diff <= stack_size ) {
168 /* The only way we can be closer than the allocated */
169 /* stack size is if we are running on this thread. */
170 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
171 return i;
172 }
173 }
174 }
175
176 /* get specific to try and determine our gtid */
177 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
178 "thread, using TLS\n" ));
179 i = __kmp_gtid_get_specific();
180
181 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
182
183 /* if we havn't been assigned a gtid, then return code */
184 if( i<0 ) return i;
185
186 /* dynamically updated stack window for uber threads to avoid get_specific call */
187 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
188 KMP_FATAL( StackOverflow, i );
189 }
190
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000192 if( stack_addr > stack_base ) {
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
195 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
196 } else {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
198 }
199
200 /* Reprint stack bounds for ubermaster since they have been refined */
201 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000202 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
203 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000205 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206 "th_%d stack (refinement)", i );
207 }
208 return i;
209}
210
211int
212__kmp_get_global_thread_id_reg( )
213{
214 int gtid;
215
216 if ( !__kmp_init_serial ) {
217 gtid = KMP_GTID_DNE;
218 } else
219#ifdef KMP_TDATA_GTID
220 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
221 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
222 gtid = __kmp_gtid;
223 } else
224#endif
225 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
226 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
227 gtid = __kmp_gtid_get_specific();
228 } else {
229 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
230 gtid = __kmp_get_global_thread_id();
231 }
232
233 /* we must be a new uber master sibling thread */
234 if( gtid == KMP_GTID_DNE ) {
235 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
236 "Registering a new gtid.\n" ));
237 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
238 if( !__kmp_init_serial ) {
239 __kmp_do_serial_initialize();
240 gtid = __kmp_gtid_get_specific();
241 } else {
242 gtid = __kmp_register_root(FALSE);
243 }
244 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
245 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
246 }
247
248 KMP_DEBUG_ASSERT( gtid >=0 );
249
250 return gtid;
251}
252
253/* caller must hold forkjoin_lock */
254void
255__kmp_check_stack_overlap( kmp_info_t *th )
256{
257 int f;
258 char *stack_beg = NULL;
259 char *stack_end = NULL;
260 int gtid;
261
262 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
263 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
265 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266
267 gtid = __kmp_gtid_from_thread( th );
268
269 if (gtid == KMP_GTID_MONITOR) {
270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)", "mon",
272 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
273 } else {
274 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
275 "th_%d stack (%s)", gtid,
276 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
277 }
278 }
279
280 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000281 gtid = __kmp_gtid_from_thread( th );
282 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000283 {
284 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
285 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000286 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
287 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 }
289
290 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
291 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
292
293 if( f_th && f_th != th ) {
294 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
295 char *other_stack_beg = other_stack_end -
296 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
297 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
298 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
299
300 /* Print the other stack values before the abort */
301 if ( __kmp_storage_map )
302 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
303 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
304 "th_%d stack (overlapped)",
305 __kmp_gtid_from_thread( f_th ) );
306
307 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
308 }
309 }
310 }
311 }
312 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
313}
314
315
316/* ------------------------------------------------------------------------ */
317
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* ------------------------------------------------------------------------ */
319
320void
321__kmp_infinite_loop( void )
322{
323 static int done = FALSE;
324
325 while (! done) {
326 KMP_YIELD( 1 );
327 }
328}
329
330#define MAX_MESSAGE 512
331
332void
333__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
334 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 va_list ap;
336
337 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000338 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
340 __kmp_vprintf( kmp_err, buffer, ap );
341#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000342 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343 if(gtid >= 0) {
344 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
345 if( __kmp_storage_map_verbose ) {
346 node = __kmp_get_host_node(p1);
347 if(node < 0) /* doesn't work, so don't try this next time */
348 __kmp_storage_map_verbose = FALSE;
349 else {
350 char *last;
351 int lastNode;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
353
Jonathan Peyton762bc462016-10-26 21:42:48 +0000354 const int page_size = KMP_GET_PAGE_SIZE();
355
356 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) );
357 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358 if(localProc >= 0)
359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
360 else
361 __kmp_printf_no_lock(" GTID %d\n", gtid);
362# if KMP_USE_PRCTL
363/* The more elaborate format is disabled for now because of the prctl hanging bug. */
364 do {
365 last = p1;
366 lastNode = node;
367 /* This loop collates adjacent pages with the same host node. */
368 do {
Jonathan Peyton762bc462016-10-26 21:42:48 +0000369 (char*)p1 += page_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
371 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
372 (char*)p1 - 1, lastNode);
373 } while(p1 <= p2);
374# else
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000376 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377 if(p1 < p2) {
378 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000379 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000380 }
381# endif
382 }
383 }
384 } else
385 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
386 }
387#endif /* KMP_PRINT_DATA_PLACEMENT */
388 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
389}
390
391void
392__kmp_warn( char const * format, ... )
393{
394 char buffer[MAX_MESSAGE];
395 va_list ap;
396
397 if ( __kmp_generate_warnings == kmp_warnings_off ) {
398 return;
399 }
400
401 va_start( ap, format );
402
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000403 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
405 __kmp_vprintf( kmp_err, buffer, ap );
406 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
407
408 va_end( ap );
409}
410
411void
412__kmp_abort_process()
413{
414
415 // Later threads may stall here, but that's ok because abort() will kill them.
416 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
417
418 if ( __kmp_debug_buf ) {
419 __kmp_dump_debug_buffer();
420 }; // if
421
422 if ( KMP_OS_WINDOWS ) {
423 // Let other threads know of abnormal termination and prevent deadlock
424 // if abort happened during library initialization or shutdown
425 __kmp_global.g.g_abort = SIGABRT;
426
427 /*
428 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
429 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
430 works well, but this function is not available in VS7 (this is not problem for DLL, but
431 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
432 not help, at least in some versions of MS C RTL.
433
434 It seems following sequence is the only way to simulate abort() and avoid pop-up error
435 box.
436 */
437 raise( SIGABRT );
438 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
441 }; // if
442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
445
446} // __kmp_abort_process
447
448void
449__kmp_abort_thread( void )
450{
451 // TODO: Eliminate g_abort global variable and this function.
452 // In case of abort just call abort(), it will kill all the threads.
453 __kmp_infinite_loop();
454} // __kmp_abort_thread
455
456/* ------------------------------------------------------------------------ */
457
458/*
459 * Print out the storage map for the major kmp_info_t thread data structures
460 * that are allocated together.
461 */
462
463static void
464__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
465{
466 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
467
468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
469 "th_%d.th_info", gtid );
470
471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
472 "th_%d.th_local", gtid );
473
474 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
475 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
476
477 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
478 &thr->th.th_bar[bs_plain_barrier+1],
479 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
480
481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
482 &thr->th.th_bar[bs_forkjoin_barrier+1],
483 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
484
485 #if KMP_FAST_REDUCTION_BARRIER
486 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier+1],
488 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
489 #endif // KMP_FAST_REDUCTION_BARRIER
490}
491
492/*
493 * Print out the storage map for the major kmp_team_t team data structures
494 * that are allocated together.
495 */
496
497static void
498__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
499{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000500 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000501 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
502 header, team_id );
503
504 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
505 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
506
507
508 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
509 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
512 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
513
514 #if KMP_FAST_REDUCTION_BARRIER
515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
516 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
517 #endif // KMP_FAST_REDUCTION_BARRIER
518
519 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
523 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
524
525 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
526 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
527 header, team_id );
528
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529
530 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
531 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
532}
533
534static void __kmp_init_allocator() {}
535static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536
537/* ------------------------------------------------------------------------ */
538
Jonathan Peyton99016992015-05-26 17:32:53 +0000539#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540# if KMP_OS_WINDOWS
541
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542static void
543__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
544 // TODO: Change to __kmp_break_bootstrap_lock().
545 __kmp_init_bootstrap_lock( lck ); // make the lock released
546}
547
548static void
549__kmp_reset_locks_on_process_detach( int gtid_req ) {
550 int i;
551 int thread_count;
552
553 // PROCESS_DETACH is expected to be called by a thread
554 // that executes ProcessExit() or FreeLibrary().
555 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
556 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
557 // However, in fact, some threads can be still alive here, although being about to be terminated.
558 // The threads in the array with ds_thread==0 are most suspicious.
559 // Actually, it can be not safe to access the __kmp_threads[].
560
561 // TODO: does it make sense to check __kmp_roots[] ?
562
563 // Let's check that there are no other alive threads registered with the OMP lib.
564 while( 1 ) {
565 thread_count = 0;
566 for( i = 0; i < __kmp_threads_capacity; ++i ) {
567 if( !__kmp_threads ) continue;
568 kmp_info_t* th = __kmp_threads[ i ];
569 if( th == NULL ) continue;
570 int gtid = th->th.th_info.ds.ds_gtid;
571 if( gtid == gtid_req ) continue;
572 if( gtid < 0 ) continue;
573 DWORD exit_val;
574 int alive = __kmp_is_thread_alive( th, &exit_val );
575 if( alive ) {
576 ++thread_count;
577 }
578 }
579 if( thread_count == 0 ) break; // success
580 }
581
582 // Assume that I'm alone.
583
584 // Now it might be probably safe to check and reset locks.
585 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
586 __kmp_reset_lock( &__kmp_forkjoin_lock );
587 #ifdef KMP_DEBUG
588 __kmp_reset_lock( &__kmp_stdio_lock );
589 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000590}
591
592BOOL WINAPI
593DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
594 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
595
596 switch( fdwReason ) {
597
598 case DLL_PROCESS_ATTACH:
599 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
600
601 return TRUE;
602
603 case DLL_PROCESS_DETACH:
604 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
605 __kmp_gtid_get_specific() ));
606
607 if( lpReserved != NULL )
608 {
609 // lpReserved is used for telling the difference:
610 // lpReserved == NULL when FreeLibrary() was called,
611 // lpReserved != NULL when the process terminates.
612 // When FreeLibrary() is called, worker threads remain alive.
613 // So they will release the forkjoin lock by themselves.
614 // When the process terminates, worker threads disappear triggering
615 // the problem of unreleased forkjoin lock as described below.
616
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000617 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 // The problem comes up if that worker thread becomes dead
619 // before it releases the forkjoin lock.
620 // The forkjoin lock remains taken, while the thread
621 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
622 // will try to take the forkjoin lock and will always fail,
623 // so that the application will never finish [normally].
624 // This scenario is possible if __kmpc_end() has not been executed.
625 // It looks like it's not a corner case, but common cases:
626 // - the main function was compiled by an alternative compiler;
627 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
628 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
629 // - alive foreign thread prevented __kmpc_end from doing cleanup.
630
631 // This is a hack to work around the problem.
632 // TODO: !!! to figure out something better.
633 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
634 }
635
636 __kmp_internal_end_library( __kmp_gtid_get_specific() );
637
638 return TRUE;
639
640 case DLL_THREAD_ATTACH:
641 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
642
643 /* if we wanted to register new siblings all the time here call
644 * __kmp_get_gtid(); */
645 return TRUE;
646
647 case DLL_THREAD_DETACH:
648 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
649 __kmp_gtid_get_specific() ));
650
651 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
652 return TRUE;
653 }
654
655 return TRUE;
656}
657
658# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000659#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660
661
662/* ------------------------------------------------------------------------ */
663
664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
666int
667__kmp_change_library( int status )
668{
669 int old_status;
670
671 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
672
673 if (status) {
674 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
675 }
676 else {
677 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
678 }
679
680 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
681}
682
683/* ------------------------------------------------------------------------ */
684/* ------------------------------------------------------------------------ */
685
686/* __kmp_parallel_deo --
687 * Wait until it's our turn.
688 */
689void
690__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
691{
692 int gtid = *gtid_ref;
693#ifdef BUILD_PARALLEL_ORDERED
694 kmp_team_t *team = __kmp_team_from_gtid( gtid );
695#endif /* BUILD_PARALLEL_ORDERED */
696
697 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000699#if KMP_USE_DYNAMIC_LOCK
700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
701#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704 }
705#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000708 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000709 KMP_MB();
710 }
711#endif /* BUILD_PARALLEL_ORDERED */
712}
713
714/* __kmp_parallel_dxo --
715 * Signal the next task.
716 */
717
718void
719__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
720{
721 int gtid = *gtid_ref;
722#ifdef BUILD_PARALLEL_ORDERED
723 int tid = __kmp_tid_from_gtid( gtid );
724 kmp_team_t *team = __kmp_team_from_gtid( gtid );
725#endif /* BUILD_PARALLEL_ORDERED */
726
727 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000728 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
730 }
731#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000732 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733 KMP_MB(); /* Flush all pending memory write invalidates. */
734
735 /* use the tid of the next thread in this team */
736 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000739#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000740 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000741 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
742 /* accept blame for "ordered" waiting */
743 kmp_info_t *this_thread = __kmp_threads[gtid];
744 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
745 this_thread->th.ompt_thread_info.wait_id);
746 }
747#endif
748
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749 KMP_MB(); /* Flush all pending memory write invalidates. */
750 }
751#endif /* BUILD_PARALLEL_ORDERED */
752}
753
754/* ------------------------------------------------------------------------ */
755/* ------------------------------------------------------------------------ */
756
757/* ------------------------------------------------------------------------ */
758/* ------------------------------------------------------------------------ */
759
760/* The BARRIER for a SINGLE process section is always explicit */
761
762int
763__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
764{
765 int status;
766 kmp_info_t *th;
767 kmp_team_t *team;
768
769 if( ! TCR_4(__kmp_init_parallel) )
770 __kmp_parallel_initialize();
771
772 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000773 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000774 status = 0;
775
776 th->th.th_ident = id_ref;
777
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000778 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000779 status = 1;
780 } else {
781 kmp_int32 old_this = th->th.th_local.this_construct;
782
783 ++th->th.th_local.this_construct;
784 /* try to set team count to thread count--success means thread got the
785 single block
786 */
787 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000788 if (team->t.t_construct == old_this) {
789 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
790 th->th.th_local.this_construct);
791 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000792#if USE_ITT_BUILD
793 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
794#if OMP_40_ENABLED
795 th->th.th_teams_microtask == NULL &&
796#endif
797 team->t.t_active_level == 1 )
798 { // Only report metadata by master of active team at level 1
799 __kmp_itt_metadata_single( id_ref );
800 }
801#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 }
803
804 if( __kmp_env_consistency_check ) {
805 if (status && push_ws) {
806 __kmp_push_workshare( gtid, ct_psingle, id_ref );
807 } else {
808 __kmp_check_workshare( gtid, ct_psingle, id_ref );
809 }
810 }
811#if USE_ITT_BUILD
812 if ( status ) {
813 __kmp_itt_single_start( gtid );
814 }
815#endif /* USE_ITT_BUILD */
816 return status;
817}
818
819void
820__kmp_exit_single( int gtid )
821{
822#if USE_ITT_BUILD
823 __kmp_itt_single_end( gtid );
824#endif /* USE_ITT_BUILD */
825 if( __kmp_env_consistency_check )
826 __kmp_pop_workshare( gtid, ct_psingle, NULL );
827}
828
829
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830/*
831 * determine if we can go parallel or must use a serialized parallel region and
832 * how many threads we can use
833 * set_nproc is the number of threads requested for the team
834 * returns 0 if we should serialize or only use one thread,
835 * otherwise the number of threads to use
836 * The forkjoin lock is held by the caller.
837 */
838static int
839__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
840 int master_tid, int set_nthreads
841#if OMP_40_ENABLED
842 , int enter_teams
843#endif /* OMP_40_ENABLED */
844)
845{
846 int capacity;
847 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848 KMP_DEBUG_ASSERT( __kmp_init_serial );
849 KMP_DEBUG_ASSERT( root && parent_team );
850
851 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 // If dyn-var is set, dynamically adjust the number of desired threads,
853 // according to the method specified by dynamic_mode.
854 //
855 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
857 ;
858 }
859#ifdef USE_LOAD_BALANCE
860 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
861 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
862 if ( new_nthreads == 1 ) {
863 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
864 master_tid ));
865 return 1;
866 }
867 if ( new_nthreads < set_nthreads ) {
868 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
869 master_tid, new_nthreads ));
870 }
871 }
872#endif /* USE_LOAD_BALANCE */
873 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
874 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
875 : root->r.r_hot_team->t.t_nproc);
876 if ( new_nthreads <= 1 ) {
877 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
878 master_tid ));
879 return 1;
880 }
881 if ( new_nthreads < set_nthreads ) {
882 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
883 master_tid, new_nthreads ));
884 }
885 else {
886 new_nthreads = set_nthreads;
887 }
888 }
889 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
890 if ( set_nthreads > 2 ) {
891 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
892 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
893 if ( new_nthreads == 1 ) {
894 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
895 master_tid ));
896 return 1;
897 }
898 if ( new_nthreads < set_nthreads ) {
899 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
900 master_tid, new_nthreads ));
901 }
902 }
903 }
904 else {
905 KMP_ASSERT( 0 );
906 }
907
908 //
909 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
910 //
911 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
913 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
914 root->r.r_hot_team->t.t_nproc );
915 if ( tl_nthreads <= 0 ) {
916 tl_nthreads = 1;
917 }
918
919 //
920 // If dyn-var is false, emit a 1-time warning.
921 //
922 if ( ! get__dynamic_2( parent_team, master_tid )
923 && ( ! __kmp_reserve_warn ) ) {
924 __kmp_reserve_warn = 1;
925 __kmp_msg(
926 kmp_ms_warning,
927 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
928 KMP_HNT( Unset_ALL_THREADS ),
929 __kmp_msg_null
930 );
931 }
932 if ( tl_nthreads == 1 ) {
933 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
934 master_tid ));
935 return 1;
936 }
937 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
938 master_tid, tl_nthreads ));
939 new_nthreads = tl_nthreads;
940 }
941
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942 //
943 // Check if the threads array is large enough, or needs expanding.
944 //
945 // See comment in __kmp_register_root() about the adjustment if
946 // __kmp_threads[0] == NULL.
947 //
948 capacity = __kmp_threads_capacity;
949 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
950 --capacity;
951 }
952 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
953 root->r.r_hot_team->t.t_nproc ) > capacity ) {
954 //
955 // Expand the threads array.
956 //
957 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
958 root->r.r_hot_team->t.t_nproc ) - capacity;
959 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
960 if ( slotsAdded < slotsRequired ) {
961 //
962 // The threads array was not expanded enough.
963 //
964 new_nthreads -= ( slotsRequired - slotsAdded );
965 KMP_ASSERT( new_nthreads >= 1 );
966
967 //
968 // If dyn-var is false, emit a 1-time warning.
969 //
970 if ( ! get__dynamic_2( parent_team, master_tid )
971 && ( ! __kmp_reserve_warn ) ) {
972 __kmp_reserve_warn = 1;
973 if ( __kmp_tp_cached ) {
974 __kmp_msg(
975 kmp_ms_warning,
976 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
977 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
978 KMP_HNT( PossibleSystemLimitOnThreads ),
979 __kmp_msg_null
980 );
981 }
982 else {
983 __kmp_msg(
984 kmp_ms_warning,
985 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
986 KMP_HNT( SystemLimitOnThreads ),
987 __kmp_msg_null
988 );
989 }
990 }
991 }
992 }
993
994 if ( new_nthreads == 1 ) {
995 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
996 __kmp_get_gtid(), set_nthreads ) );
997 return 1;
998 }
999
1000 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1001 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1002 return new_nthreads;
1003}
1004
1005/* ------------------------------------------------------------------------ */
1006/* ------------------------------------------------------------------------ */
1007
1008/* allocate threads from the thread pool and assign them to the new team */
1009/* we are assured that there are enough threads available, because we
1010 * checked on that earlier within critical section forkjoin */
1011
1012static void
1013__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1014 kmp_info_t *master_th, int master_gtid )
1015{
1016 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001017 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018
1019 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1020 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1021 KMP_MB();
1022
1023 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001024 master_th->th.th_info.ds.ds_tid = 0;
1025 master_th->th.th_team = team;
1026 master_th->th.th_team_nproc = team->t.t_nproc;
1027 master_th->th.th_team_master = master_th;
1028 master_th->th.th_team_serialized = FALSE;
1029 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030
1031 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032#if KMP_NESTED_HOT_TEAMS
1033 use_hot_team = 0;
1034 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1035 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1036 int level = team->t.t_active_level - 1; // index in array of hot teams
1037 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1038 if( master_th->th.th_teams_size.nteams > 1 ) {
1039 ++level; // level was not increased in teams construct for team_of_masters
1040 }
1041 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1042 master_th->th.th_teams_level == team->t.t_level ) {
1043 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1044 } // team->t.t_level will be increased inside parallel
1045 }
1046 if( level < __kmp_hot_teams_max_level ) {
1047 if( hot_teams[level].hot_team ) {
1048 // hot team has already been allocated for given level
1049 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1050 use_hot_team = 1; // the team is ready to use
1051 } else {
1052 use_hot_team = 0; // AC: threads are not allocated yet
1053 hot_teams[level].hot_team = team; // remember new hot team
1054 hot_teams[level].hot_team_nth = team->t.t_nproc;
1055 }
1056 } else {
1057 use_hot_team = 0;
1058 }
1059 }
1060#else
1061 use_hot_team = team == root->r.r_hot_team;
1062#endif
1063 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001064
1065 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001066 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 __kmp_initialize_info( master_th, team, 0, master_gtid );
1068
1069 /* now, install the worker threads */
1070 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1071
1072 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1074 team->t.t_threads[ i ] = thr;
1075 KMP_DEBUG_ASSERT( thr );
1076 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001078 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1080 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1081 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1082 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083#if OMP_40_ENABLED
1084 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1085 thr->th.th_teams_level = master_th->th.th_teams_level;
1086 thr->th.th_teams_size = master_th->th.th_teams_size;
1087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088 { // Initialize threads' barrier data.
1089 int b;
1090 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1091 for ( b = 0; b < bs_last_barrier; ++ b ) {
1092 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001093 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001094#if USE_DEBUGGER
1095 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097 }; // for b
1098 }
1099 }
1100
Alp Toker98758b02014-03-02 04:12:06 +00001101#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102 __kmp_partition_places( team );
1103#endif
1104
1105 }
1106
1107 KMP_MB();
1108}
1109
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001110#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1111//
1112// Propagate any changes to the floating point control registers out to the team
1113// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1114// so we don't make changes unless they are needed.
1115//
1116inline static void
1117propagateFPControl(kmp_team_t * team)
1118{
1119 if ( __kmp_inherit_fp_control ) {
1120 kmp_int16 x87_fpu_control_word;
1121 kmp_uint32 mxcsr;
1122
1123 // Get master values of FPU control flags (both X87 and vector)
1124 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1125 __kmp_store_mxcsr( &mxcsr );
1126 mxcsr &= KMP_X86_MXCSR_MASK;
1127
1128 // There is no point looking at t_fp_control_saved here.
1129 // If it is TRUE, we still have to update the values if they are different from those we now have.
1130 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1131 // that the values in the team are the same as those we have.
1132 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1133 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1134 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001135 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1136 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001137 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1138 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001139 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
1141 else {
1142 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144 }
1145}
1146
1147// Do the opposite, setting the hardware registers to the updated values from the team.
1148inline static void
1149updateHWFPControl(kmp_team_t * team)
1150{
1151 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1152 //
1153 // Only reset the fp control regs if they have been changed in the team.
1154 // the parallel region that we are exiting.
1155 //
1156 kmp_int16 x87_fpu_control_word;
1157 kmp_uint32 mxcsr;
1158 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1159 __kmp_store_mxcsr( &mxcsr );
1160 mxcsr &= KMP_X86_MXCSR_MASK;
1161
1162 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1163 __kmp_clear_x87_fpu_status_word();
1164 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1165 }
1166
1167 if ( team->t.t_mxcsr != mxcsr ) {
1168 __kmp_load_mxcsr( &team->t.t_mxcsr );
1169 }
1170 }
1171}
1172#else
1173# define propagateFPControl(x) ((void)0)
1174# define updateHWFPControl(x) ((void)0)
1175#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1176
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177static void
1178__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001180/*
1181 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1182 */
1183void
1184__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1185{
1186 kmp_info_t *this_thr;
1187 kmp_team_t *serial_team;
1188
1189 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1190
1191 /* Skip all this code for autopar serialized loops since it results in
1192 unacceptable overhead */
1193 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1194 return;
1195
1196 if( ! TCR_4( __kmp_init_parallel ) )
1197 __kmp_parallel_initialize();
1198
1199 this_thr = __kmp_threads[ global_tid ];
1200 serial_team = this_thr->th.th_serial_team;
1201
1202 /* utilize the serialized team held by this thread */
1203 KMP_DEBUG_ASSERT( serial_team );
1204 KMP_MB();
1205
1206 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001207 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1208 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001209 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1210 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1211 this_thr->th.th_task_team = NULL;
1212 }
1213
1214#if OMP_40_ENABLED
1215 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1216 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1217 proc_bind = proc_bind_false;
1218 }
1219 else if ( proc_bind == proc_bind_default ) {
1220 //
1221 // No proc_bind clause was specified, so use the current value
1222 // of proc-bind-var for this parallel region.
1223 //
1224 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1225 }
1226 //
1227 // Reset for next parallel region
1228 //
1229 this_thr->th.th_set_proc_bind = proc_bind_default;
1230#endif /* OMP_40_ENABLED */
1231
1232 if( this_thr->th.th_team != serial_team ) {
1233 // Nested level will be an index in the nested nthreads array
1234 int level = this_thr->th.th_team->t.t_level;
1235
1236 if( serial_team->t.t_serialized ) {
1237 /* this serial team was already used
1238 * TODO increase performance by making this locks more specific */
1239 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240
1241 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1242
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001243#if OMPT_SUPPORT
1244 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1245#endif
1246
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001248#if OMPT_SUPPORT
1249 ompt_parallel_id,
1250#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001251#if OMP_40_ENABLED
1252 proc_bind,
1253#endif
1254 & this_thr->th.th_current_task->td_icvs,
1255 0 USE_NESTED_HOT_ARG(NULL) );
1256 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1257 KMP_ASSERT( new_team );
1258
1259 /* setup new serialized team and install it */
1260 new_team->t.t_threads[0] = this_thr;
1261 new_team->t.t_parent = this_thr->th.th_team;
1262 serial_team = new_team;
1263 this_thr->th.th_serial_team = serial_team;
1264
1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1266 global_tid, serial_team ) );
1267
1268
1269 /* TODO the above breaks the requirement that if we run out of
1270 * resources, then we can still guarantee that serialized teams
1271 * are ok, since we may need to allocate a new one */
1272 } else {
1273 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1274 global_tid, serial_team ) );
1275 }
1276
1277 /* we have to initialize this serial team */
1278 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1279 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1280 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1281 serial_team->t.t_ident = loc;
1282 serial_team->t.t_serialized = 1;
1283 serial_team->t.t_nproc = 1;
1284 serial_team->t.t_parent = this_thr->th.th_team;
1285 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1286 this_thr->th.th_team = serial_team;
1287 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1288
1289 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1290 global_tid, this_thr->th.th_current_task ) );
1291 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1292 this_thr->th.th_current_task->td_flags.executing = 0;
1293
1294 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1295
1296 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1297 each serialized task represented by team->t.t_serialized? */
1298 copy_icvs(
1299 & this_thr->th.th_current_task->td_icvs,
1300 & this_thr->th.th_current_task->td_parent->td_icvs );
1301
1302 // Thread value exists in the nested nthreads array for the next nested level
1303 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1304 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1305 }
1306
1307#if OMP_40_ENABLED
1308 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1309 this_thr->th.th_current_task->td_icvs.proc_bind
1310 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1311 }
1312#endif /* OMP_40_ENABLED */
1313
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001314#if USE_DEBUGGER
1315 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1316#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317 this_thr->th.th_info.ds.ds_tid = 0;
1318
1319 /* set thread cache values */
1320 this_thr->th.th_team_nproc = 1;
1321 this_thr->th.th_team_master = this_thr;
1322 this_thr->th.th_team_serialized = 1;
1323
1324 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1325 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1326
1327 propagateFPControl (serial_team);
1328
1329 /* check if we need to allocate dispatch buffers stack */
1330 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1331 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1332 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1333 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1334 }
1335 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1336
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001337#if OMPT_SUPPORT
1338 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1339 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1340#endif
1341
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 KMP_MB();
1343
1344 } else {
1345 /* this serialized team is already being used,
1346 * that's fine, just add another nested level */
1347 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1348 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1349 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1350 ++ serial_team->t.t_serialized;
1351 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1352
1353 // Nested level will be an index in the nested nthreads array
1354 int level = this_thr->th.th_team->t.t_level;
1355 // Thread value exists in the nested nthreads array for the next nested level
1356 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1357 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1358 }
1359 serial_team->t.t_level++;
1360 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1361 global_tid, serial_team, serial_team->t.t_level ) );
1362
1363 /* allocate/push dispatch buffers stack */
1364 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1365 {
1366 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1367 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1368 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1369 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1370 }
1371 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1372
1373 KMP_MB();
1374 }
1375
1376 if ( __kmp_env_consistency_check )
1377 __kmp_push_parallel( global_tid, NULL );
1378
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001379}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001380
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381/* most of the work for a fork */
1382/* return true if we really went parallel, false if serialized */
1383int
1384__kmp_fork_call(
1385 ident_t * loc,
1386 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001387 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001389#if OMPT_SUPPORT
1390 void *unwrapped_task,
1391#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001392 microtask_t microtask,
1393 launch_t invoker,
1394/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001395#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001396 va_list * ap
1397#else
1398 va_list ap
1399#endif
1400 )
1401{
1402 void **argv;
1403 int i;
1404 int master_tid;
1405 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406 kmp_team_t *team;
1407 kmp_team_t *parent_team;
1408 kmp_info_t *master_th;
1409 kmp_root_t *root;
1410 int nthreads;
1411 int master_active;
1412 int master_set_numthreads;
1413 int level;
1414#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001415 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416 int teams_level;
1417#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418#if KMP_NESTED_HOT_TEAMS
1419 kmp_hot_team_ptr_t **p_hot_teams;
1420#endif
1421 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001422 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001423 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001424
1425 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001426 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1427 /* Some systems prefer the stack for the root thread(s) to start with */
1428 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001429 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430 /* These 2 lines below are so this does not get optimized out */
1431 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1432 __kmp_stkpadding += (short)((kmp_int64)dummy);
1433 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434
1435 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001436 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437 if( ! TCR_4(__kmp_init_parallel) )
1438 __kmp_parallel_initialize();
1439
1440 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001441 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1442 parent_team = master_th->th.th_team;
1443 master_tid = master_th->th.th_info.ds.ds_tid;
1444 master_this_cons = master_th->th.th_local.this_construct;
1445 root = master_th->th.th_root;
1446 master_active = root->r.r_active;
1447 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001448
1449#if OMPT_SUPPORT
1450 ompt_parallel_id_t ompt_parallel_id;
1451 ompt_task_id_t ompt_task_id;
1452 ompt_frame_t *ompt_frame;
1453 ompt_task_id_t my_task_id;
1454 ompt_parallel_id_t my_parallel_id;
1455
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001456 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001457 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1458 ompt_task_id = __ompt_get_task_id_internal(0);
1459 ompt_frame = __ompt_get_task_frame_internal(0);
1460 }
1461#endif
1462
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463 // Nested level will be an index in the nested nthreads array
1464 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001465 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001466#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1468#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001469#if KMP_NESTED_HOT_TEAMS
1470 p_hot_teams = &master_th->th.th_hot_teams;
1471 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1472 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1473 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1474 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1475 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1476 }
1477#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001480 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1482 int team_size = master_set_numthreads;
1483
1484 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1485 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001486 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001487 }
1488#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490 master_th->th.th_ident = loc;
1491
1492#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001493 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1495 // AC: This is start of parallel that is nested inside teams construct.
1496 // The team is actual (hot), all workers are ready at the fork barrier.
1497 // No lock needed to initialize the team a bit, then free workers.
1498 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001499 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500 parent_team->t.t_argc = argc;
1501 argv = (void**)parent_team->t.t_argv;
1502 for( i=argc-1; i >= 0; --i )
1503/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001504#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001505 *argv++ = va_arg( *ap, void * );
1506#else
1507 *argv++ = va_arg( ap, void * );
1508#endif
1509 /* Increment our nested depth levels, but not increase the serialization */
1510 if ( parent_team == master_th->th.th_serial_team ) {
1511 // AC: we are in serialized parallel
1512 __kmpc_serialized_parallel(loc, gtid);
1513 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1514 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1515 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001516
1517#if OMPT_SUPPORT
1518 void *dummy;
1519 void **exit_runtime_p;
1520
1521 ompt_lw_taskteam_t lw_taskteam;
1522
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001523 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1525 unwrapped_task, ompt_parallel_id);
1526 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1527 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1528
1529 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1530
1531#if OMPT_TRACE
1532 /* OMPT implicit task begin */
1533 my_task_id = lw_taskteam.ompt_task_info.task_id;
1534 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001535 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001536 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1537 my_parallel_id, my_task_id);
1538 }
1539#endif
1540
1541 /* OMPT state */
1542 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1543 } else {
1544 exit_runtime_p = &dummy;
1545 }
1546#endif
1547
Jonathan Peyton45be4502015-08-11 21:36:41 +00001548 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001549 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1550 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001553 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001554#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001555 );
1556 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557
1558#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001559 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001560 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001562 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001563
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001564 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001565 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1566 ompt_parallel_id, ompt_task_id);
1567 }
1568
1569 __ompt_lw_taskteam_unlink(master_th);
1570 // reset clear the task id only after unlinking the task
1571 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1572#endif
1573
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001574 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001575 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001576 ompt_parallel_id, ompt_task_id,
1577 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001578 }
1579 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1580 }
1581#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582 return TRUE;
1583 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001584
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001586#if OMPT_SUPPORT
1587 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1588#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589 parent_team->t.t_invoke = invoker;
1590 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1591 parent_team->t.t_active_level ++;
1592 parent_team->t.t_level ++;
1593
1594 /* Change number of threads in the team if requested */
1595 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001596 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001597 // AC: only can reduce the number of threads dynamically, cannot increase
1598 kmp_info_t **other_threads = parent_team->t.t_threads;
1599 parent_team->t.t_nproc = master_set_numthreads;
1600 for ( i = 0; i < master_set_numthreads; ++i ) {
1601 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1602 }
1603 // Keep extra threads hot in the team for possible next parallels
1604 }
1605 master_th->th.th_set_nproc = 0;
1606 }
1607
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001608#if USE_DEBUGGER
1609 if ( __kmp_debugging ) { // Let debugger override number of threads.
1610 int nth = __kmp_omp_num_threads( loc );
1611 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1612 master_set_numthreads = nth;
1613 }; // if
1614 }; // if
1615#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616
1617 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1618 __kmp_internal_fork( loc, gtid, parent_team );
1619 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1620
1621 /* Invoke microtask for MASTER thread */
1622 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1623 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1624
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001625 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001626 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1627 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001628 if (! parent_team->t.t_invoke( gtid )) {
1629 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1630 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631 }
1632 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1633 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1634 KMP_MB(); /* Flush all pending memory write invalidates. */
1635
1636 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1637
1638 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001639 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001640#endif /* OMP_40_ENABLED */
1641
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001642#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001644 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1649 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001650 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001651#if OMP_40_ENABLED
1652 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1653#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001655 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001656
1657 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1658 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1659 if (nthreads > 1) {
1660 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1661#if OMP_40_ENABLED
1662 && !enter_teams
1663#endif /* OMP_40_ENABLED */
1664 ) ) || ( __kmp_library == library_serial ) ) {
1665 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1666 gtid, nthreads ));
1667 nthreads = 1;
1668 }
1669 }
1670 if ( nthreads > 1 ) {
1671 /* determine how many new threads we can use */
1672 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1673
1674 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001676/* AC: If we execute teams from parallel region (on host), then teams should be created
1677 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1678 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001679 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001681 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001682 if ( nthreads == 1 ) {
1683 // Free lock for single thread execution here;
1684 // for multi-thread execution it will be freed later
1685 // after team of threads created and initialized
1686 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1687 }
1688 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689 }
1690 KMP_DEBUG_ASSERT( nthreads > 0 );
1691
1692 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001693 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 /* create a serialized parallel region? */
1696 if ( nthreads == 1 ) {
1697 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001698#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699 void * args[ argc ];
1700#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001701 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001702#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703
Jim Cownie5e8470a2013-09-27 10:38:44 +00001704 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1705
1706 __kmpc_serialized_parallel(loc, gtid);
1707
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001710 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711#if OMP_40_ENABLED
1712 if ( !ap ) {
1713 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001714 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001716
1717#if OMPT_SUPPORT
1718 void *dummy;
1719 void **exit_runtime_p;
1720
1721 ompt_lw_taskteam_t lw_taskteam;
1722
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001723 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001724 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1725 unwrapped_task, ompt_parallel_id);
1726 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1727 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1728
1729 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1730
1731#if OMPT_TRACE
1732 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001733 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001734 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1735 ompt_parallel_id, my_task_id);
1736 }
1737#endif
1738
1739 /* OMPT state */
1740 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1741 } else {
1742 exit_runtime_p = &dummy;
1743 }
1744#endif
1745
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001746 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001747 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1748 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001749 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1750#if OMPT_SUPPORT
1751 , exit_runtime_p
1752#endif
1753 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001754 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001755
1756#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001757 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001758 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001759 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001760
1761#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001762 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1764 ompt_parallel_id, ompt_task_id);
1765 }
1766#endif
1767
1768 __ompt_lw_taskteam_unlink(master_th);
1769 // reset clear the task id only after unlinking the task
1770 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1771
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001772 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001773 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001774 ompt_parallel_id, ompt_task_id,
1775 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001776 }
1777 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 }
1779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001780 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1781 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1782 team = master_th->th.th_team;
1783 //team->t.t_pkfn = microtask;
1784 team->t.t_invoke = invoker;
1785 __kmp_alloc_argv_entries( argc, team, TRUE );
1786 team->t.t_argc = argc;
1787 argv = (void**) team->t.t_argv;
1788 if ( ap ) {
1789 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001791# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001793# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796 } else {
1797 for( i=0; i < argc; ++i )
1798 // Get args from parent team for teams construct
1799 argv[i] = parent_team->t.t_argv[i];
1800 }
1801 // AC: revert change made in __kmpc_serialized_parallel()
1802 // because initial code in teams should have level=0
1803 team->t.t_level--;
1804 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001805 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001808 invoker(gtid);
1809 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 } else {
1811#endif /* OMP_40_ENABLED */
1812 argv = args;
1813 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001815#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001819#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821
1822#if OMPT_SUPPORT
1823 void *dummy;
1824 void **exit_runtime_p;
1825
1826 ompt_lw_taskteam_t lw_taskteam;
1827
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001828 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001829 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1830 unwrapped_task, ompt_parallel_id);
1831 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1832 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1833
1834 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1835
1836#if OMPT_TRACE
1837 /* OMPT implicit task begin */
1838 my_task_id = lw_taskteam.ompt_task_info.task_id;
1839 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001840 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001841 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1842 my_parallel_id, my_task_id);
1843 }
1844#endif
1845
1846 /* OMPT state */
1847 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1848 } else {
1849 exit_runtime_p = &dummy;
1850 }
1851#endif
1852
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001853 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001854 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1855 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001856 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1857#if OMPT_SUPPORT
1858 , exit_runtime_p
1859#endif
1860 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001861 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001862
1863#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001864 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001865 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001866#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001867 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001868
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001869 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001870 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1871 my_parallel_id, my_task_id);
1872 }
1873#endif
1874
1875 __ompt_lw_taskteam_unlink(master_th);
1876 // reset clear the task id only after unlinking the task
1877 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1878
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001879 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001880 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001881 ompt_parallel_id, ompt_task_id,
1882 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001883 }
1884 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1885 }
1886#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887#if OMP_40_ENABLED
1888 }
1889#endif /* OMP_40_ENABLED */
1890 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001891 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001892#if OMPT_SUPPORT
1893 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1894 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1895 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1896 unwrapped_task, ompt_parallel_id);
1897
1898 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001899 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001900 __ompt_lw_taskteam_link(lwt, master_th);
1901#endif
1902
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001903 // we were called from GNU native code
1904 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1905 return FALSE;
1906 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001908 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909 }
1910
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001912 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913 KMP_MB();
1914 return FALSE;
1915 }
1916
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917 // GEH: only modify the executing flag in the case when not serialized
1918 // serialized case is handled in kmpc_serialized_parallel
1919 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001920 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1921 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1923 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1924 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925
1926#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001927 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928#endif /* OMP_40_ENABLED */
1929 {
1930 /* Increment our nested depth level */
1931 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1932 }
1933
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001936 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1937 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939 else {
1940 nthreads_icv = 0; // don't update
1941 }
1942
1943#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001944 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001946 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1948 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 }
1950 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001951 if (proc_bind == proc_bind_default) {
1952 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1953 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001955 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1956 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001958 if ((level+1 < __kmp_nested_proc_bind.used)
1959 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1960 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961 }
1962 }
1963
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965 master_th->th.th_set_proc_bind = proc_bind_default;
1966#endif /* OMP_40_ENABLED */
1967
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001972 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001976 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977 new_icvs.nproc = nthreads_icv;
1978 }
1979
1980#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001981 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982 new_icvs.proc_bind = proc_bind_icv;
1983 }
1984#endif /* OMP_40_ENABLED */
1985
1986 /* allocate a new parallel team */
1987 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001989#if OMPT_SUPPORT
1990 ompt_parallel_id,
1991#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001995 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1996 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 /* allocate a new parallel team */
1998 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1999 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002000#if OMPT_SUPPORT
2001 ompt_parallel_id,
2002#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002004 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002006 &master_th->th.th_current_task->td_icvs, argc
2007 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002009 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
2011 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002012 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2013 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2014 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2015 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2016 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002018 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002019#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002020 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021 // TODO: parent_team->t.t_level == INT_MAX ???
2022#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002023 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002025 int new_level = parent_team->t.t_level + 1;
2026 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2027 new_level = parent_team->t.t_active_level + 1;
2028 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029#if OMP_40_ENABLED
2030 } else {
2031 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 int new_level = parent_team->t.t_level;
2033 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2034 new_level = parent_team->t.t_active_level;
2035 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036 }
2037#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002038 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002039 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002040 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002042#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002043 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002044#endif
2045
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 // Update the floating point rounding in the team if required.
2047 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048
2049 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002050 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002051#if 0
2052 // Patch out an assertion that trips while the runtime seems to operate correctly.
2053 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002054 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002055#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002058 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002059
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002060 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002061 // Take a memo of master's task_state
2062 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2063 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002064 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2065 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002067 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002068 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2069 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2070 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002071 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2072 new_stack[i] = 0;
2073 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002074 old_stack = master_th->th.th_task_state_memo_stack;
2075 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002076 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002077 __kmp_free(old_stack);
2078 }
2079 // Store master's task_state on stack
2080 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2081 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002082#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002083 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002084 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2085 }
2086 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002087#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002088 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002089#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002090 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002091#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002092 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002093#if !KMP_NESTED_HOT_TEAMS
2094 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097
2098 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2099 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2100 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2101 ( team->t.t_master_tid == 0 &&
2102 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2103 KMP_MB();
2104
2105 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002106 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107#if OMP_40_ENABLED
2108 if ( ap ) {
2109#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002110 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002111// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002112#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002115 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002116#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002117 KMP_CHECK_UPDATE(*argv, new_argv);
2118 argv++;
2119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120#if OMP_40_ENABLED
2121 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002122 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002124 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2125 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126 }
2127#endif /* OMP_40_ENABLED */
2128
2129 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002130 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002131 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2132 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133
2134 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002135 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002137#if OMPT_SUPPORT
2138 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2139#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002140
2141 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2142
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002144 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002146 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002147# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002148 ) {
2149#if USE_ITT_NOTIFY
2150 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2151 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002153 kmp_uint64 tmp_time = 0;
2154 if ( __itt_get_timestamp_ptr )
2155 tmp_time = __itt_get_timestamp();
2156 // Internal fork - report frame begin
2157 master_th->th.th_frame_time = tmp_time;
2158 if ( __kmp_forkjoin_frames_mode == 3 )
2159 team->t.t_region_time = tmp_time;
2160 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2161#endif /* USE_ITT_NOTIFY */
2162 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2163 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2164 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002165 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2166 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002167 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168#endif /* USE_ITT_BUILD */
2169
2170 /* now go on and do the work */
2171 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2172 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2174 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175
2176#if USE_ITT_BUILD
2177 if ( __itt_stack_caller_create_ptr ) {
2178 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2179 }
2180#endif /* USE_ITT_BUILD */
2181
2182#if OMP_40_ENABLED
2183 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2184#endif /* OMP_40_ENABLED */
2185 {
2186 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002187 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2188 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189 }
2190
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002191 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2193 return TRUE;
2194 }
2195
2196 /* Invoke microtask for MASTER thread */
2197 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2198 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002200
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002201 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002202 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2203 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204 if (! team->t.t_invoke( gtid )) {
2205 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2206 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207 }
2208 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2209 gtid, team->t.t_id, team->t.t_pkfn ) );
2210 KMP_MB(); /* Flush all pending memory write invalidates. */
2211
2212 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2213
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002214#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002215 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002216 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2217 }
2218#endif
2219
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 return TRUE;
2221}
2222
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002223#if OMPT_SUPPORT
2224static inline void
2225__kmp_join_restore_state(
2226 kmp_info_t *thread,
2227 kmp_team_t *team)
2228{
2229 // restore state outside the region
2230 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2231 ompt_state_work_serial : ompt_state_work_parallel);
2232}
2233
2234static inline void
2235__kmp_join_ompt(
2236 kmp_info_t *thread,
2237 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002238 ompt_parallel_id_t parallel_id,
2239 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002241 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002242 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002244 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002245 }
2246
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002247 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002248 __kmp_join_restore_state(thread,team);
2249}
2250#endif
2251
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002253__kmp_join_call(ident_t *loc, int gtid
2254#if OMPT_SUPPORT
2255 , enum fork_context_e fork_context
2256#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257#if OMP_40_ENABLED
2258 , int exit_teams
2259#endif /* OMP_40_ENABLED */
2260)
2261{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00002262 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263 kmp_team_t *team;
2264 kmp_team_t *parent_team;
2265 kmp_info_t *master_th;
2266 kmp_root_t *root;
2267 int master_active;
2268 int i;
2269
2270 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2271
2272 /* setup current data */
2273 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002274 root = master_th->th.th_root;
2275 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276 parent_team = team->t.t_parent;
2277
2278 master_th->th.th_ident = loc;
2279
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002280#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002281 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002282 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2283 }
2284#endif
2285
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002286#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2288 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2289 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002290 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2291 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002293#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294
2295 if( team->t.t_serialized ) {
2296#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002297 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002298 // We are in teams construct
2299 int level = team->t.t_level;
2300 int tlevel = master_th->th.th_teams_level;
2301 if ( level == tlevel ) {
2302 // AC: we haven't incremented it earlier at start of teams construct,
2303 // so do it here - at the end of teams construct
2304 team->t.t_level++;
2305 } else if ( level == tlevel + 1 ) {
2306 // AC: we are exiting parallel inside teams, need to increment serialization
2307 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2308 team->t.t_serialized++;
2309 }
2310 }
2311#endif /* OMP_40_ENABLED */
2312 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002313
2314#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002315 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316 __kmp_join_restore_state(master_th, parent_team);
2317 }
2318#endif
2319
Jim Cownie5e8470a2013-09-27 10:38:44 +00002320 return;
2321 }
2322
2323 master_active = team->t.t_master_active;
2324
2325#if OMP_40_ENABLED
2326 if (!exit_teams)
2327#endif /* OMP_40_ENABLED */
2328 {
2329 // AC: No barrier for internal teams at exit from teams construct.
2330 // But there is barrier for external team (league).
2331 __kmp_internal_join( loc, gtid, team );
2332 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002333#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002334 else {
2335 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2336 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002337#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002338
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339 KMP_MB();
2340
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002341#if OMPT_SUPPORT
2342 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2343#endif
2344
Jim Cownie5e8470a2013-09-27 10:38:44 +00002345#if USE_ITT_BUILD
2346 if ( __itt_stack_caller_create_ptr ) {
2347 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2348 }
2349
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002350 // Mark end of "parallel" region for VTune.
2351 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002353 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002355 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002356 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002357 // only one notification scheme (either "submit" or "forking/joined", not both)
2358 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2359 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2360 0, loc, master_th->th.th_team_nproc, 1 );
2361 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2362 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2363 __kmp_itt_region_joined( gtid );
2364 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365#endif /* USE_ITT_BUILD */
2366
2367#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002368 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369 !exit_teams &&
2370 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2371 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2372 // AC: We need to leave the team structure intact at the end
2373 // of parallel inside the teams construct, so that at the next
2374 // parallel same (hot) team works, only adjust nesting levels
2375
2376 /* Decrement our nested depth level */
2377 team->t.t_level --;
2378 team->t.t_active_level --;
2379 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2380
2381 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002384 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385 kmp_info_t **other_threads = team->t.t_threads;
2386 team->t.t_nproc = new_num;
2387 for ( i = 0; i < old_num; ++i ) {
2388 other_threads[i]->th.th_team_nproc = new_num;
2389 }
2390 // Adjust states of non-used threads of the team
2391 for ( i = old_num; i < new_num; ++i ) {
2392 // Re-initialize thread's barrier data.
2393 int b;
2394 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002395 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002397 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002398#if USE_DEBUGGER
2399 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2400#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002401 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002402 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2403 // Synchronize thread's task state
2404 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2405 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002406 }
2407 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002408
2409#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002410 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002411 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002412 }
2413#endif
2414
Jim Cownie5e8470a2013-09-27 10:38:44 +00002415 return;
2416 }
2417#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002419 /* do cleanup and restore the parent team */
2420 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2421 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2422
2423 master_th->th.th_dispatch =
2424 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425
2426 /* jc: The following lock has instructions with REL and ACQ semantics,
2427 separating the parallel user code called in this parallel region
2428 from the serial user code called after this function returns.
2429 */
2430 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2431
2432#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002433 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434#endif /* OMP_40_ENABLED */
2435 {
2436 /* Decrement our nested depth level */
2437 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2438 }
2439 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2440
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002441#if OMPT_SUPPORT && OMPT_TRACE
2442 if(ompt_enabled){
2443 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2444 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2445 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2446 parallel_id, task_info->task_id);
2447 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002448 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002449 task_info->task_id = 0;
2450 }
2451#endif
2452
Jim Cownie5e8470a2013-09-27 10:38:44 +00002453 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2454 0, master_th, team ) );
2455 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456
Alp Toker98758b02014-03-02 04:12:06 +00002457#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458 //
2459 // Restore master thread's partition.
2460 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002461 master_th->th.th_first_place = team->t.t_first_place;
2462 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463#endif /* OMP_40_ENABLED */
2464
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002465 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002467 if ( root->r.r_active != master_active )
2468 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002470 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471
2472 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002473 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002475 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476 * assertion failures. it's only one deref&assign so might as well put this
2477 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002478 master_th->th.th_team = parent_team;
2479 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2480 master_th->th.th_team_master = parent_team->t.t_threads[0];
2481 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482
2483 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002484 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485 parent_team != master_th->th.th_serial_team &&
2486 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002487 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2488 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002489 }
2490
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002492 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2493 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2494 // Remember master's state if we re-use this nested hot team
2495 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002496 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002498 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002500 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002501 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002503 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002504 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002506 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2507 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2508 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002509
2510 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2511
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002512#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002513 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002514 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002515 }
2516#endif
2517
Jim Cownie5e8470a2013-09-27 10:38:44 +00002518 KMP_MB();
2519 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2520}
2521
2522/* ------------------------------------------------------------------------ */
2523/* ------------------------------------------------------------------------ */
2524
2525/* Check whether we should push an internal control record onto the
2526 serial team stack. If so, do it. */
2527void
2528__kmp_save_internal_controls ( kmp_info_t * thread )
2529{
2530
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002531 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 return;
2533 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535 int push = 0;
2536
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002537 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 push = 1;
2539 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002540 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2541 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542 push = 1;
2543 }
2544 }
2545 if (push) { /* push a record on the serial team's stack */
2546 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2547
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549
2550 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2551
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002552 control->next = thread->th.th_team->t.t_control_stack_top;
2553 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554 }
2555 }
2556}
2557
2558/* Changes set_nproc */
2559void
2560__kmp_set_num_threads( int new_nth, int gtid )
2561{
2562 kmp_info_t *thread;
2563 kmp_root_t *root;
2564
2565 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2566 KMP_DEBUG_ASSERT( __kmp_init_serial );
2567
2568 if (new_nth < 1)
2569 new_nth = 1;
2570 else if (new_nth > __kmp_max_nth)
2571 new_nth = __kmp_max_nth;
2572
Jonathan Peyton45be4502015-08-11 21:36:41 +00002573 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002574 thread = __kmp_threads[gtid];
2575
2576 __kmp_save_internal_controls( thread );
2577
2578 set__nproc( thread, new_nth );
2579
2580 //
2581 // If this omp_set_num_threads() call will cause the hot team size to be
2582 // reduced (in the absence of a num_threads clause), then reduce it now,
2583 // rather than waiting for the next parallel region.
2584 //
2585 root = thread->th.th_root;
2586 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002587 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2588#if KMP_NESTED_HOT_TEAMS
2589 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2590#endif
2591 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 kmp_team_t *hot_team = root->r.r_hot_team;
2593 int f;
2594
2595 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2596
Jim Cownie5e8470a2013-09-27 10:38:44 +00002597 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2599 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002600 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2601 // When decreasing team size, threads no longer in the team should unref task team.
2602 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2603 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 __kmp_free_thread( hot_team->t.t_threads[f] );
2605 hot_team->t.t_threads[f] = NULL;
2606 }
2607 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002608#if KMP_NESTED_HOT_TEAMS
2609 if( thread->th.th_hot_teams ) {
2610 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2611 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2612 }
2613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2616
2617 //
2618 // Update the t_nproc field in the threads that are still active.
2619 //
2620 for( f=0 ; f < new_nth; f++ ) {
2621 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2622 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2623 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002625 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627}
2628
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629/* Changes max_active_levels */
2630void
2631__kmp_set_max_active_levels( int gtid, int max_active_levels )
2632{
2633 kmp_info_t *thread;
2634
2635 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2636 KMP_DEBUG_ASSERT( __kmp_init_serial );
2637
2638 // validate max_active_levels
2639 if( max_active_levels < 0 ) {
2640 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2641 // We ignore this call if the user has specified a negative value.
2642 // The current setting won't be changed. The last valid setting will be used.
2643 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2644 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2645 return;
2646 }
2647 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2648 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2649 // We allow a zero value. (implementation defined behavior)
2650 } else {
2651 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2652 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2653 // Current upper limit is MAX_INT. (implementation defined behavior)
2654 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2655 // Actually, the flow should never get here until we use MAX_INT limit.
2656 }
2657 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2658
2659 thread = __kmp_threads[ gtid ];
2660
2661 __kmp_save_internal_controls( thread );
2662
2663 set__max_active_levels( thread, max_active_levels );
2664
2665}
2666
2667/* Gets max_active_levels */
2668int
2669__kmp_get_max_active_levels( int gtid )
2670{
2671 kmp_info_t *thread;
2672
2673 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2674 KMP_DEBUG_ASSERT( __kmp_init_serial );
2675
2676 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002677 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002679 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2680 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681}
2682
2683/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2684void
2685__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2686{
2687 kmp_info_t *thread;
2688// kmp_team_t *team;
2689
2690 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2691 KMP_DEBUG_ASSERT( __kmp_init_serial );
2692
2693 // Check if the kind parameter is valid, correct if needed.
2694 // Valid parameters should fit in one of two intervals - standard or extended:
2695 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2696 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2697 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2698 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2699 {
2700 // TODO: Hint needs attention in case we change the default schedule.
2701 __kmp_msg(
2702 kmp_ms_warning,
2703 KMP_MSG( ScheduleKindOutOfRange, kind ),
2704 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2705 __kmp_msg_null
2706 );
2707 kind = kmp_sched_default;
2708 chunk = 0; // ignore chunk value in case of bad kind
2709 }
2710
2711 thread = __kmp_threads[ gtid ];
2712
2713 __kmp_save_internal_controls( thread );
2714
2715 if ( kind < kmp_sched_upper_std ) {
2716 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2717 // differ static chunked vs. unchunked:
2718 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002721 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722 }
2723 } else {
2724 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002725 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2727 }
2728 if ( kind == kmp_sched_auto ) {
2729 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002730 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002732 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733 }
2734}
2735
2736/* Gets def_sched_var ICV values */
2737void
2738__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2739{
2740 kmp_info_t *thread;
2741 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742
2743 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2744 KMP_DEBUG_ASSERT( __kmp_init_serial );
2745
2746 thread = __kmp_threads[ gtid ];
2747
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002748 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749
2750 switch ( th_type ) {
2751 case kmp_sch_static:
2752 case kmp_sch_static_greedy:
2753 case kmp_sch_static_balanced:
2754 *kind = kmp_sched_static;
2755 *chunk = 0; // chunk was not set, try to show this fact via zero value
2756 return;
2757 case kmp_sch_static_chunked:
2758 *kind = kmp_sched_static;
2759 break;
2760 case kmp_sch_dynamic_chunked:
2761 *kind = kmp_sched_dynamic;
2762 break;
2763 case kmp_sch_guided_chunked:
2764 case kmp_sch_guided_iterative_chunked:
2765 case kmp_sch_guided_analytical_chunked:
2766 *kind = kmp_sched_guided;
2767 break;
2768 case kmp_sch_auto:
2769 *kind = kmp_sched_auto;
2770 break;
2771 case kmp_sch_trapezoidal:
2772 *kind = kmp_sched_trapezoidal;
2773 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002774#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002775 case kmp_sch_static_steal:
2776 *kind = kmp_sched_static_steal;
2777 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002778#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 default:
2780 KMP_FATAL( UnknownSchedulingType, th_type );
2781 }
2782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002783 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784}
2785
2786int
2787__kmp_get_ancestor_thread_num( int gtid, int level ) {
2788
2789 int ii, dd;
2790 kmp_team_t *team;
2791 kmp_info_t *thr;
2792
2793 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2794 KMP_DEBUG_ASSERT( __kmp_init_serial );
2795
2796 // validate level
2797 if( level == 0 ) return 0;
2798 if( level < 0 ) return -1;
2799 thr = __kmp_threads[ gtid ];
2800 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002801 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802 if( level > ii ) return -1;
2803
2804#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002805 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806 // AC: we are in teams region where multiple nested teams have same level
2807 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2808 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2809 KMP_DEBUG_ASSERT( ii >= tlevel );
2810 // AC: As we need to pass by the teams league, we need to artificially increase ii
2811 if ( ii == tlevel ) {
2812 ii += 2; // three teams have same level
2813 } else {
2814 ii ++; // two teams have same level
2815 }
2816 }
2817 }
2818#endif
2819
2820 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2821
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002822 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823 level++;
2824 while( ii > level )
2825 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002826 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827 {
2828 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 team = team->t.t_parent;
2831 continue;
2832 }
2833 if( ii > level ) {
2834 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002835 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836 ii--;
2837 }
2838 }
2839
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002840 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
2843int
2844__kmp_get_team_size( int gtid, int level ) {
2845
2846 int ii, dd;
2847 kmp_team_t *team;
2848 kmp_info_t *thr;
2849
2850 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2851 KMP_DEBUG_ASSERT( __kmp_init_serial );
2852
2853 // validate level
2854 if( level == 0 ) return 1;
2855 if( level < 0 ) return -1;
2856 thr = __kmp_threads[ gtid ];
2857 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002858 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859 if( level > ii ) return -1;
2860
2861#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002862 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002863 // AC: we are in teams region where multiple nested teams have same level
2864 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2865 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2866 KMP_DEBUG_ASSERT( ii >= tlevel );
2867 // AC: As we need to pass by the teams league, we need to artificially increase ii
2868 if ( ii == tlevel ) {
2869 ii += 2; // three teams have same level
2870 } else {
2871 ii ++; // two teams have same level
2872 }
2873 }
2874 }
2875#endif
2876
2877 while( ii > level )
2878 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002879 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880 {
2881 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002882 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883 team = team->t.t_parent;
2884 continue;
2885 }
2886 if( ii > level ) {
2887 team = team->t.t_parent;
2888 ii--;
2889 }
2890 }
2891
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002892 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893}
2894
Jim Cownie5e8470a2013-09-27 10:38:44 +00002895kmp_r_sched_t
2896__kmp_get_schedule_global() {
2897// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2898// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2899
2900 kmp_r_sched_t r_sched;
2901
2902 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2903 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2904 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2905 if ( __kmp_sched == kmp_sch_static ) {
2906 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2907 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2908 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2909 } else {
2910 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2911 }
2912
2913 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2914 r_sched.chunk = KMP_DEFAULT_CHUNK;
2915 } else {
2916 r_sched.chunk = __kmp_chunk;
2917 }
2918
2919 return r_sched;
2920}
2921
2922/* ------------------------------------------------------------------------ */
2923/* ------------------------------------------------------------------------ */
2924
2925
2926/*
2927 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2928 * at least argc number of *t_argv entries for the requested team.
2929 */
2930static void
2931__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2932{
2933
2934 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002935 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936
2937 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2938 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002940 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2941 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942
2943 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2944 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002945 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2947 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002948 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949 if ( __kmp_storage_map ) {
2950 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2951 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2952 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2953 "team_%d.t_inline_argv",
2954 team->t.t_id );
2955 }
2956 } else {
2957 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002958 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2960 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2961 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002962 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963 if ( __kmp_storage_map ) {
2964 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2965 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2966 team->t.t_id );
2967 }
2968 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969 }
2970}
2971
2972static void
2973__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2974{
2975 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002976 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002977 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2978 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002979 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002980 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002981 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982 team->t.t_max_nproc = max_nth;
2983
2984 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002985 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002986 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002987#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002988 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2989#endif
2990 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002991}
2992
2993static void
2994__kmp_free_team_arrays(kmp_team_t *team) {
2995 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2996 int i;
2997 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2998 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2999 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3000 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3001 }; // if
3002 }; // for
3003 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00003004 __kmp_free(team->t.t_disp_buffer);
3005 __kmp_free(team->t.t_dispatch);
3006 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003007 team->t.t_threads = NULL;
3008 team->t.t_disp_buffer = NULL;
3009 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003010 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011}
3012
3013static void
3014__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3015 kmp_info_t **oldThreads = team->t.t_threads;
3016
Jonathan Peytona58563d2016-03-29 20:05:27 +00003017 __kmp_free(team->t.t_disp_buffer);
3018 __kmp_free(team->t.t_dispatch);
3019 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020 __kmp_allocate_team_arrays(team, max_nth);
3021
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003022 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003023
3024 __kmp_free(oldThreads);
3025}
3026
3027static kmp_internal_control_t
3028__kmp_get_global_icvs( void ) {
3029
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003031
3032#if OMP_40_ENABLED
3033 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3034#endif /* OMP_40_ENABLED */
3035
3036 kmp_internal_control_t g_icvs = {
3037 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003038 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3039 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3040 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003042#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003043 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003044#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003045 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3046 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3048 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049#if OMP_40_ENABLED
3050 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003051 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052#endif /* OMP_40_ENABLED */
3053 NULL //struct kmp_internal_control *next;
3054 };
3055
3056 return g_icvs;
3057}
3058
3059static kmp_internal_control_t
3060__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3061
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062 kmp_internal_control_t gx_icvs;
3063 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3064 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3065 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066
3067 return gx_icvs;
3068}
3069
3070static void
3071__kmp_initialize_root( kmp_root_t *root )
3072{
3073 int f;
3074 kmp_team_t *root_team;
3075 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003076 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3078 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079 KMP_DEBUG_ASSERT( root );
3080 KMP_ASSERT( ! root->r.r_begin );
3081
3082 /* setup the root state structure */
3083 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003084 root->r.r_begin = FALSE;
3085 root->r.r_active = FALSE;
3086 root->r.r_in_parallel = 0;
3087 root->r.r_blocktime = __kmp_dflt_blocktime;
3088 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089
3090 /* setup the root team for this task */
3091 /* allocate the root team structure */
3092 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003093
Jim Cownie5e8470a2013-09-27 10:38:44 +00003094 root_team =
3095 __kmp_allocate_team(
3096 root,
3097 1, // new_nproc
3098 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003099#if OMPT_SUPPORT
3100 0, // root parallel id
3101#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102#if OMP_40_ENABLED
3103 __kmp_nested_proc_bind.bind_types[0],
3104#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003105 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003107 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003109#if USE_DEBUGGER
3110 // Non-NULL value should be assigned to make the debugger display the root team.
3111 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3112#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003113
3114 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3115
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003116 root->r.r_root_team = root_team;
3117 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003118
3119 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003120 root_team->t.t_threads[0] = NULL;
3121 root_team->t.t_nproc = 1;
3122 root_team->t.t_serialized = 1;
3123 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3124 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3125 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3127 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3128
3129 /* setup the hot team for this task */
3130 /* allocate the hot team structure */
3131 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003132
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133 hot_team =
3134 __kmp_allocate_team(
3135 root,
3136 1, // new_nproc
3137 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003138#if OMPT_SUPPORT
3139 0, // root parallel id
3140#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141#if OMP_40_ENABLED
3142 __kmp_nested_proc_bind.bind_types[0],
3143#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003146 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147 );
3148 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3149
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003150 root->r.r_hot_team = hot_team;
3151 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003152
3153 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003154 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
3156 /* initialize hot team */
3157 hot_team_max_nth = hot_team->t.t_max_nproc;
3158 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003159 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003160 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003161 hot_team->t.t_nproc = 1;
3162 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3163 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3164 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003165 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003166}
3167
3168#ifdef KMP_DEBUG
3169
3170
3171typedef struct kmp_team_list_item {
3172 kmp_team_p const * entry;
3173 struct kmp_team_list_item * next;
3174} kmp_team_list_item_t;
3175typedef kmp_team_list_item_t * kmp_team_list_t;
3176
3177
3178static void
3179__kmp_print_structure_team_accum( // Add team to list of teams.
3180 kmp_team_list_t list, // List of teams.
3181 kmp_team_p const * team // Team to add.
3182) {
3183
3184 // List must terminate with item where both entry and next are NULL.
3185 // Team is added to the list only once.
3186 // List is sorted in ascending order by team id.
3187 // Team id is *not* a key.
3188
3189 kmp_team_list_t l;
3190
3191 KMP_DEBUG_ASSERT( list != NULL );
3192 if ( team == NULL ) {
3193 return;
3194 }; // if
3195
3196 __kmp_print_structure_team_accum( list, team->t.t_parent );
3197 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3198
3199 // Search list for the team.
3200 l = list;
3201 while ( l->next != NULL && l->entry != team ) {
3202 l = l->next;
3203 }; // while
3204 if ( l->next != NULL ) {
3205 return; // Team has been added before, exit.
3206 }; // if
3207
3208 // Team is not found. Search list again for insertion point.
3209 l = list;
3210 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3211 l = l->next;
3212 }; // while
3213
3214 // Insert team.
3215 {
3216 kmp_team_list_item_t * item =
3217 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3218 * item = * l;
3219 l->entry = team;
3220 l->next = item;
3221 }
3222
3223}
3224
3225static void
3226__kmp_print_structure_team(
3227 char const * title,
3228 kmp_team_p const * team
3229
3230) {
3231 __kmp_printf( "%s", title );
3232 if ( team != NULL ) {
3233 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3234 } else {
3235 __kmp_printf( " - (nil)\n" );
3236 }; // if
3237}
3238
3239static void
3240__kmp_print_structure_thread(
3241 char const * title,
3242 kmp_info_p const * thread
3243
3244) {
3245 __kmp_printf( "%s", title );
3246 if ( thread != NULL ) {
3247 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3248 } else {
3249 __kmp_printf( " - (nil)\n" );
3250 }; // if
3251}
3252
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003253void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003254__kmp_print_structure(
3255 void
3256) {
3257
3258 kmp_team_list_t list;
3259
3260 // Initialize list of teams.
3261 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3262 list->entry = NULL;
3263 list->next = NULL;
3264
3265 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3266 {
3267 int gtid;
3268 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3269 __kmp_printf( "%2d", gtid );
3270 if ( __kmp_threads != NULL ) {
3271 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3272 }; // if
3273 if ( __kmp_root != NULL ) {
3274 __kmp_printf( " %p", __kmp_root[ gtid ] );
3275 }; // if
3276 __kmp_printf( "\n" );
3277 }; // for gtid
3278 }
3279
3280 // Print out __kmp_threads array.
3281 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3282 if ( __kmp_threads != NULL ) {
3283 int gtid;
3284 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3285 kmp_info_t const * thread = __kmp_threads[ gtid ];
3286 if ( thread != NULL ) {
3287 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3288 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3289 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3290 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3291 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3292 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3293 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3294 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3295#if OMP_40_ENABLED
3296 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3297#endif
3298 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3299 __kmp_printf( "\n" );
3300 __kmp_print_structure_team_accum( list, thread->th.th_team );
3301 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3302 }; // if
3303 }; // for gtid
3304 } else {
3305 __kmp_printf( "Threads array is not allocated.\n" );
3306 }; // if
3307
3308 // Print out __kmp_root array.
3309 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3310 if ( __kmp_root != NULL ) {
3311 int gtid;
3312 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3313 kmp_root_t const * root = __kmp_root[ gtid ];
3314 if ( root != NULL ) {
3315 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3316 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3317 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3318 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3319 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3320 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3321 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3322 __kmp_printf( "\n" );
3323 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3324 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3325 }; // if
3326 }; // for gtid
3327 } else {
3328 __kmp_printf( "Ubers array is not allocated.\n" );
3329 }; // if
3330
3331 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3332 while ( list->next != NULL ) {
3333 kmp_team_p const * team = list->entry;
3334 int i;
3335 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3336 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3337 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3338 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3339 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3340 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3341 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3342 __kmp_printf( " Thread %2d: ", i );
3343 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3344 }; // for i
3345 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3346 __kmp_printf( "\n" );
3347 list = list->next;
3348 }; // while
3349
3350 // Print out __kmp_thread_pool and __kmp_team_pool.
3351 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3352 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3353 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3354 __kmp_printf( "\n" );
3355
3356 // Free team list.
3357 while ( list != NULL ) {
3358 kmp_team_list_item_t * item = list;
3359 list = list->next;
3360 KMP_INTERNAL_FREE( item );
3361 }; // while
3362
3363}
3364
3365#endif
3366
3367
3368//---------------------------------------------------------------------------
3369// Stuff for per-thread fast random number generator
3370// Table of primes
3371
3372static const unsigned __kmp_primes[] = {
3373 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3374 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3375 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3376 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3377 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3378 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3379 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3380 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3381 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3382 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3383 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3384 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3385 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3386 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3387 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3388 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3389};
3390
3391//---------------------------------------------------------------------------
3392// __kmp_get_random: Get a random number using a linear congruential method.
3393
3394unsigned short
3395__kmp_get_random( kmp_info_t * thread )
3396{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003397 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398 unsigned short r = x>>16;
3399
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003400 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401
3402 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3403 thread->th.th_info.ds.ds_tid, r) );
3404
3405 return r;
3406}
3407//--------------------------------------------------------
3408// __kmp_init_random: Initialize a random number generator
3409
3410void
3411__kmp_init_random( kmp_info_t * thread )
3412{
3413 unsigned seed = thread->th.th_info.ds.ds_tid;
3414
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003415 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3416 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3417 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003418}
3419
3420
3421#if KMP_OS_WINDOWS
3422/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3423static int
3424__kmp_reclaim_dead_roots(void) {
3425 int i, r = 0;
3426
3427 for(i = 0; i < __kmp_threads_capacity; ++i) {
3428 if( KMP_UBER_GTID( i ) &&
3429 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3430 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3431 r += __kmp_unregister_root_other_thread(i);
3432 }
3433 }
3434 return r;
3435}
3436#endif
3437
3438/*
3439 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3440 free entries generated.
3441
3442 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3443 already dead.
3444
3445 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3446 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3447 __kmp_tp_capacity, if threadprivate cache array has been created.
3448 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3449
3450 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3451 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3452 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3453 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3454 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3455 as many free slots as possible up to nWish.
3456
3457 If any argument is negative, the behavior is undefined.
3458*/
3459static int
3460__kmp_expand_threads(int nWish, int nNeed) {
3461 int added = 0;
3462 int old_tp_cached;
3463 int __kmp_actual_max_nth;
3464
3465 if(nNeed > nWish) /* normalize the arguments */
3466 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003467#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468/* only for Windows static library */
3469 /* reclaim array entries for root threads that are already dead */
3470 added = __kmp_reclaim_dead_roots();
3471
3472 if(nNeed) {
3473 nNeed -= added;
3474 if(nNeed < 0)
3475 nNeed = 0;
3476 }
3477 if(nWish) {
3478 nWish -= added;
3479 if(nWish < 0)
3480 nWish = 0;
3481 }
3482#endif
3483 if(nWish <= 0)
3484 return added;
3485
3486 while(1) {
3487 int nTarget;
3488 int minimumRequiredCapacity;
3489 int newCapacity;
3490 kmp_info_t **newThreads;
3491 kmp_root_t **newRoot;
3492
3493 //
3494 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3495 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3496 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3497 // become > __kmp_max_nth in one of two ways:
3498 //
3499 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3500 // may not be resused by another thread, so we may need to increase
3501 // __kmp_threads_capacity to __kmp_max_threads + 1.
3502 //
3503 // 2) New foreign root(s) are encountered. We always register new
3504 // foreign roots. This may cause a smaller # of threads to be
3505 // allocated at subsequent parallel regions, but the worker threads
3506 // hang around (and eventually go to sleep) and need slots in the
3507 // __kmp_threads[] array.
3508 //
3509 // Anyway, that is the reason for moving the check to see if
3510 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3511 // instead of having it performed here. -BB
3512 //
3513 old_tp_cached = __kmp_tp_cached;
3514 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3515 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3516
3517 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3518 nTarget = nWish;
3519 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3520 /* can't fulfil nWish, so try nNeed */
3521 if(nNeed) {
3522 nTarget = nNeed;
3523 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3524 /* possible expansion too small -- give up */
3525 break;
3526 }
3527 } else {
3528 /* best-effort */
3529 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3530 if(!nTarget) {
3531 /* can expand at all -- give up */
3532 break;
3533 }
3534 }
3535 }
3536 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3537
3538 newCapacity = __kmp_threads_capacity;
3539 do{
3540 newCapacity =
3541 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3542 (newCapacity << 1) :
3543 __kmp_actual_max_nth;
3544 } while(newCapacity < minimumRequiredCapacity);
3545 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3546 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003547 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3548 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549 memset(newThreads + __kmp_threads_capacity, 0,
3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3551 memset(newRoot + __kmp_threads_capacity, 0,
3552 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3553
3554 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3555 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3556 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3557 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3558 of a double-check pair.
3559 */
3560 __kmp_free(newThreads);
3561 continue; /* start over and try again */
3562 }
3563 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3564 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3565 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3566 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3567 __kmp_free(newThreads);
3568 continue; /* start over and try again */
3569 } else {
3570 /* success */
3571 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3572 //
3573 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3574 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3575 added += newCapacity - __kmp_threads_capacity;
3576 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3577 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003578 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579 }
3580 }
3581 return added;
3582}
3583
3584/* register the current thread as a root thread and obtain our gtid */
3585/* we must have the __kmp_initz_lock held at this point */
3586/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3587int
3588__kmp_register_root( int initial_thread )
3589{
3590 kmp_info_t *root_thread;
3591 kmp_root_t *root;
3592 int gtid;
3593 int capacity;
3594 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3595 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3596 KMP_MB();
3597
3598
3599 /*
3600 2007-03-02:
3601
3602 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3603 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3604 return false (that means there is at least one empty slot in __kmp_threads array), but it
3605 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3606 used for this one. Following code workarounds this bug.
3607
3608 However, right solution seems to be not reserving slot #0 for initial thread because:
3609 (1) there is no magic in slot #0,
3610 (2) we cannot detect initial thread reliably (the first thread which does serial
3611 initialization may be not a real initial thread).
3612 */
3613 capacity = __kmp_threads_capacity;
3614 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3615 -- capacity;
3616 }; // if
3617
3618 /* see if there are too many threads */
3619 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3620 if ( __kmp_tp_cached ) {
3621 __kmp_msg(
3622 kmp_ms_fatal,
3623 KMP_MSG( CantRegisterNewThread ),
3624 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3625 KMP_HNT( PossibleSystemLimitOnThreads ),
3626 __kmp_msg_null
3627 );
3628 }
3629 else {
3630 __kmp_msg(
3631 kmp_ms_fatal,
3632 KMP_MSG( CantRegisterNewThread ),
3633 KMP_HNT( SystemLimitOnThreads ),
3634 __kmp_msg_null
3635 );
3636 }
3637 }; // if
3638
3639 /* find an available thread slot */
3640 /* Don't reassign the zero slot since we need that to only be used by initial
3641 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003642 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3643 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3645 KMP_ASSERT( gtid < __kmp_threads_capacity );
3646
3647 /* update global accounting */
3648 __kmp_all_nth ++;
3649 TCW_4(__kmp_nth, __kmp_nth + 1);
3650
3651 //
3652 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3653 // for low numbers of procs, and method #2 (keyed API call) for higher
3654 // numbers of procs.
3655 //
3656 if ( __kmp_adjust_gtid_mode ) {
3657 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3658 if ( TCR_4(__kmp_gtid_mode) != 2) {
3659 TCW_4(__kmp_gtid_mode, 2);
3660 }
3661 }
3662 else {
3663 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3664 TCW_4(__kmp_gtid_mode, 1);
3665 }
3666 }
3667 }
3668
3669#ifdef KMP_ADJUST_BLOCKTIME
3670 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003671 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3673 if ( __kmp_nth > __kmp_avail_proc ) {
3674 __kmp_zero_bt = TRUE;
3675 }
3676 }
3677#endif /* KMP_ADJUST_BLOCKTIME */
3678
3679 /* setup this new hierarchy */
3680 if( ! ( root = __kmp_root[gtid] )) {
3681 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3682 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3683 }
3684
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003685#if KMP_STATS_ENABLED
3686 // Initialize stats as soon as possible (right after gtid assignment).
3687 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3688 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3689 KMP_SET_THREAD_STATE(SERIAL_REGION);
3690 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3691#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692 __kmp_initialize_root( root );
3693
3694 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003695 if( root->r.r_uber_thread ) {
3696 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697 } else {
3698 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3699 if ( __kmp_storage_map ) {
3700 __kmp_print_thread_storage_map( root_thread, gtid );
3701 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702 root_thread->th.th_info .ds.ds_gtid = gtid;
3703 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003705 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706 }
3707 #if USE_FAST_MEMORY
3708 __kmp_initialize_fast_memory( root_thread );
3709 #endif /* USE_FAST_MEMORY */
3710
3711 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003712 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713 __kmp_initialize_bget( root_thread );
3714 #endif
3715 __kmp_init_random( root_thread ); // Initialize random number generator
3716 }
3717
3718 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 if( ! root_thread->th.th_serial_team ) {
3720 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003721 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003722
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003723 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003724#if OMPT_SUPPORT
3725 0, // root parallel id
3726#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727#if OMP_40_ENABLED
3728 proc_bind_default,
3729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003735 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
3737 /* drop root_thread into place */
3738 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3739
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003740 root->r.r_root_team->t.t_threads[0] = root_thread;
3741 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3742 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3743 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3744 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
3746 /* initialize the thread, get it ready to go */
3747 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003748 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003749
3750 /* prepare the master thread for get_gtid() */
3751 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003752
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003753#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003754 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003755#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003756
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757 #ifdef KMP_TDATA_GTID
3758 __kmp_gtid = gtid;
3759 #endif
3760 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3761 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003762
3763 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3764 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003765 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003766 KMP_INIT_BARRIER_STATE ) );
3767 { // Initialize barrier data.
3768 int b;
3769 for ( b = 0; b < bs_last_barrier; ++ b ) {
3770 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003771#if USE_DEBUGGER
3772 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3773#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774 }; // for
3775 }
3776 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3777
Alp Toker763b9392014-02-28 09:42:41 +00003778#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003779# if OMP_40_ENABLED
3780 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3781 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3782 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3783 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3784# endif
3785
Jim Cownie5e8470a2013-09-27 10:38:44 +00003786 if ( TCR_4(__kmp_init_middle) ) {
3787 __kmp_affinity_set_init_mask( gtid, TRUE );
3788 }
Alp Toker763b9392014-02-28 09:42:41 +00003789#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003790
3791 __kmp_root_counter ++;
3792
3793 KMP_MB();
3794 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3795
3796 return gtid;
3797}
3798
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003799#if KMP_NESTED_HOT_TEAMS
3800static int
3801__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3802{
3803 int i, n, nth;
3804 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3805 if( !hot_teams || !hot_teams[level].hot_team ) {
3806 return 0;
3807 }
3808 KMP_DEBUG_ASSERT( level < max_level );
3809 kmp_team_t *team = hot_teams[level].hot_team;
3810 nth = hot_teams[level].hot_team_nth;
3811 n = nth - 1; // master is not freed
3812 if( level < max_level - 1 ) {
3813 for( i = 0; i < nth; ++i ) {
3814 kmp_info_t *th = team->t.t_threads[i];
3815 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3816 if( i > 0 && th->th.th_hot_teams ) {
3817 __kmp_free( th->th.th_hot_teams );
3818 th->th.th_hot_teams = NULL;
3819 }
3820 }
3821 }
3822 __kmp_free_team( root, team, NULL );
3823 return n;
3824}
3825#endif
3826
Jim Cownie5e8470a2013-09-27 10:38:44 +00003827/* Resets a root thread and clear its root and hot teams.
3828 Returns the number of __kmp_threads entries directly and indirectly freed.
3829*/
3830static int
3831__kmp_reset_root(int gtid, kmp_root_t *root)
3832{
3833 kmp_team_t * root_team = root->r.r_root_team;
3834 kmp_team_t * hot_team = root->r.r_hot_team;
3835 int n = hot_team->t.t_nproc;
3836 int i;
3837
3838 KMP_DEBUG_ASSERT( ! root->r.r_active );
3839
3840 root->r.r_root_team = NULL;
3841 root->r.r_hot_team = NULL;
3842 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3843 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003844 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3845#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003846 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003847 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3848 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003849 if( __kmp_hot_teams_max_level > 1 ) {
3850 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3851 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003852 if( th->th.th_hot_teams ) {
3853 __kmp_free( th->th.th_hot_teams );
3854 th->th.th_hot_teams = NULL;
3855 }
3856 }
3857 }
3858#endif
3859 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003860
Jim Cownie5e8470a2013-09-27 10:38:44 +00003861 //
3862 // Before we can reap the thread, we need to make certain that all
3863 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3864 //
3865 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3866 __kmp_wait_to_unref_task_teams();
3867 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003868
3869 #if KMP_OS_WINDOWS
3870 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3871 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3872 (LPVOID)&(root->r.r_uber_thread->th),
3873 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3874 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3875 #endif /* KMP_OS_WINDOWS */
3876
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003877#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003878 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003879 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3880 int gtid = __kmp_get_gtid();
3881 __ompt_thread_end(ompt_thread_initial, gtid);
3882 }
3883#endif
3884
Jim Cownie5e8470a2013-09-27 10:38:44 +00003885 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3886 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3887
3888 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3889 root->r.r_uber_thread = NULL;
3890 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003891 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003892
3893 return n;
3894}
3895
3896void
3897__kmp_unregister_root_current_thread( int gtid )
3898{
Jim Cownie77c2a632014-09-03 11:34:33 +00003899 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003900 /* this lock should be ok, since unregister_root_current_thread is never called during
3901 * and abort, only during a normal close. furthermore, if you have the
3902 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003903
3904 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3905 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3906 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3907 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3908 return;
3909 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910 kmp_root_t *root = __kmp_root[gtid];
3911
Jim Cownie5e8470a2013-09-27 10:38:44 +00003912 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3913 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3914 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3915 KMP_ASSERT( root->r.r_active == FALSE );
3916
Jim Cownie5e8470a2013-09-27 10:38:44 +00003917
3918 KMP_MB();
3919
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003920#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003921 kmp_info_t * thread = __kmp_threads[gtid];
3922 kmp_team_t * team = thread->th.th_team;
3923 kmp_task_team_t * task_team = thread->th.th_task_team;
3924
3925 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003926 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3927#if OMPT_SUPPORT
3928 // the runtime is shutting down so we won't report any events
3929 thread->th.ompt_thread_info.state = ompt_state_undefined;
3930#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003931 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003932 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003933#endif
3934
Jim Cownie5e8470a2013-09-27 10:38:44 +00003935 __kmp_reset_root(gtid, root);
3936
3937 /* free up this thread slot */
3938 __kmp_gtid_set_specific( KMP_GTID_DNE );
3939#ifdef KMP_TDATA_GTID
3940 __kmp_gtid = KMP_GTID_DNE;
3941#endif
3942
3943 KMP_MB();
3944 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3945
3946 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3947}
3948
Jonathan Peyton2321d572015-06-08 19:25:25 +00003949#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003950/* __kmp_forkjoin_lock must be already held
3951 Unregisters a root thread that is not the current thread. Returns the number of
3952 __kmp_threads entries freed as a result.
3953 */
3954static int
3955__kmp_unregister_root_other_thread( int gtid )
3956{
3957 kmp_root_t *root = __kmp_root[gtid];
3958 int r;
3959
3960 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3961 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3962 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3963 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3964 KMP_ASSERT( root->r.r_active == FALSE );
3965
3966 r = __kmp_reset_root(gtid, root);
3967 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3968 return r;
3969}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003970#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003971
Jim Cownie5e8470a2013-09-27 10:38:44 +00003972#if KMP_DEBUG
3973void __kmp_task_info() {
3974
3975 kmp_int32 gtid = __kmp_entry_gtid();
3976 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3977 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003978 kmp_team_t *steam = this_thr->th.th_serial_team;
3979 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003980
3981 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3982 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3983}
3984#endif // KMP_DEBUG
3985
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003987 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003988 * initialization of features as much as possible */
3989static void
3990__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3991{
3992 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3993 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003996 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003997 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003998 KMP_DEBUG_ASSERT( team->t.t_threads );
3999 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4000 KMP_DEBUG_ASSERT( master );
4001 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004002
4003 KMP_MB();
4004
4005 TCW_SYNC_PTR(this_thr->th.th_team, team);
4006
4007 this_thr->th.th_info.ds.ds_tid = tid;
4008 this_thr->th.th_set_nproc = 0;
Andrey Churbanov581490e2017-02-06 18:53:32 +00004009 if (__kmp_tasking_mode != tskm_immediate_exec)
4010 // When tasking is possible, threads are not safe to reap until they are
4011 // done tasking; this will be set when tasking code is exited in wait
4012 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4013 else // no tasking --> always safe to reap
4014 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015#if OMP_40_ENABLED
4016 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00004017# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018 this_thr->th.th_new_place = this_thr->th.th_current_place;
4019# endif
4020#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004021 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004022
4023 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004024 this_thr->th.th_team_nproc = team->t.t_nproc;
4025 this_thr->th.th_team_master = master;
4026 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004027 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4028
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004029 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004030
4031 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4032 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4033
4034 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4035
4036 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4037 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4038 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039
4040 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004041 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
4043 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004044
4045#ifdef BUILD_TV
4046 this_thr->th.th_local.tv_data = 0;
4047#endif
4048
4049 if ( ! this_thr->th.th_pri_common ) {
4050 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4051 if ( __kmp_storage_map ) {
4052 __kmp_print_storage_map_gtid(
4053 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4054 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4055 );
4056 }; // if
4057 this_thr->th.th_pri_head = NULL;
4058 }; // if
4059
4060 /* Initialize dynamic dispatch */
4061 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004062 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063 /*
4064 * Use team max_nproc since this will never change for the team.
4065 */
4066 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004067 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004068 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4069 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004070 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4072
4073 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004074#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004075 dispatch->th_doacross_buf_idx = 0;
4076#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004077 if( ! dispatch->th_disp_buffer ) {
4078 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004079
4080 if ( __kmp_storage_map ) {
4081 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004082 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004083 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4084 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4085 gtid, team->t.t_id, gtid );
4086 }
4087 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004088 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004089 }
4090
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004091 dispatch->th_dispatch_pr_current = 0;
4092 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004093
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004094 dispatch->th_deo_fcn = 0; /* ORDERED */
4095 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004096 }
4097
4098 this_thr->th.th_next_pool = NULL;
4099
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004100 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004101 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004102 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4103 this_thr->th.th_task_state_top = 0;
4104 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004105 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4106 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004107 }
4108
Jim Cownie5e8470a2013-09-27 10:38:44 +00004109 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4110 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4111
4112 KMP_MB();
4113}
4114
4115
4116/* allocate a new thread for the requesting team. this is only called from within a
4117 * forkjoin critical section. we will first try to get an available thread from the
4118 * thread pool. if none is available, we will fork a new one assuming we are able
4119 * to create a new one. this should be assured, as the caller should check on this
4120 * first.
4121 */
4122kmp_info_t *
4123__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4124{
4125 kmp_team_t *serial_team;
4126 kmp_info_t *new_thr;
4127 int new_gtid;
4128
4129 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4130 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004131#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004132 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004134 KMP_MB();
4135
4136 /* first, try to get one from the thread pool */
4137 if ( __kmp_thread_pool ) {
4138
4139 new_thr = (kmp_info_t*)__kmp_thread_pool;
4140 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4141 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4142 __kmp_thread_pool_insert_pt = NULL;
4143 }
4144 TCW_4(new_thr->th.th_in_pool, FALSE);
4145 //
4146 // Don't touch th_active_in_pool or th_active.
4147 // The worker thread adjusts those flags as it sleeps/awakens.
4148 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004149 __kmp_thread_pool_nth--;
4150
4151 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4152 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004153 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004154 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4155 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4156
4157 /* setup the thread structure */
4158 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4159 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4160
4161 TCW_4(__kmp_nth, __kmp_nth + 1);
4162
Jonathan Peyton54127982015-11-04 21:37:48 +00004163 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004164 new_thr->th.th_task_state_top = 0;
4165 new_thr->th.th_task_state_stack_sz = 4;
4166
Jim Cownie5e8470a2013-09-27 10:38:44 +00004167#ifdef KMP_ADJUST_BLOCKTIME
4168 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004169 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4171 if ( __kmp_nth > __kmp_avail_proc ) {
4172 __kmp_zero_bt = TRUE;
4173 }
4174 }
4175#endif /* KMP_ADJUST_BLOCKTIME */
4176
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004177#if KMP_DEBUG
4178 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4179 int b;
4180 kmp_balign_t * balign = new_thr->th.th_bar;
4181 for( b = 0; b < bs_last_barrier; ++ b )
4182 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4183#endif
4184
Jim Cownie5e8470a2013-09-27 10:38:44 +00004185 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4186 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4187
4188 KMP_MB();
4189 return new_thr;
4190 }
4191
4192
4193 /* no, well fork a new one */
4194 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4195 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4196
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004197#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198 //
4199 // If this is the first worker thread the RTL is creating, then also
4200 // launch the monitor thread. We try to do this as early as possible.
4201 //
4202 if ( ! TCR_4( __kmp_init_monitor ) ) {
4203 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4204 if ( ! TCR_4( __kmp_init_monitor ) ) {
4205 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4206 TCW_4( __kmp_init_monitor, 1 );
4207 __kmp_create_monitor( & __kmp_monitor );
4208 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004209 #if KMP_OS_WINDOWS
4210 // AC: wait until monitor has started. This is a fix for CQ232808.
4211 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4212 // work in between, then there is high probability that monitor thread started after
4213 // the library shutdown. At shutdown it is too late to cope with the problem, because
4214 // when the master is in DllMain (process detach) the monitor has no chances to start
4215 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4216 // because all the memory which the monitor can access is going to be released/reset.
4217 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4218 KMP_YIELD( TRUE );
4219 }
4220 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4221 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222 }
4223 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4224 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004225#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226
4227 KMP_MB();
4228 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4229 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4230 }
4231
4232 /* allocate space for it. */
4233 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4234
4235 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4236
4237 if ( __kmp_storage_map ) {
4238 __kmp_print_thread_storage_map( new_thr, new_gtid );
4239 }
4240
4241 /* add the reserve serialized team, initialized from the team's master thread */
4242 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004243 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004244 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004245
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004246 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004247 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004248#if OMPT_SUPPORT
4249 0, // root parallel id
4250#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004251#if OMP_40_ENABLED
4252 proc_bind_default,
4253#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004255 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004256 }
4257 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004258 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4259 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004260 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4261 new_thr ) );
4262
4263 /* setup the thread structures */
4264 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4265
4266 #if USE_FAST_MEMORY
4267 __kmp_initialize_fast_memory( new_thr );
4268 #endif /* USE_FAST_MEMORY */
4269
4270 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004271 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004272 __kmp_initialize_bget( new_thr );
4273 #endif
4274
4275 __kmp_init_random( new_thr ); // Initialize random number generator
4276
4277 /* Initialize these only once when thread is grabbed for a team allocation */
4278 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4279 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4280
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004281 int b;
4282 kmp_balign_t * balign = new_thr->th.th_bar;
4283 for(b=0; b<bs_last_barrier; ++b) {
4284 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4285 balign[b].bb.team = NULL;
4286 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4287 balign[b].bb.use_oncore_barrier = 0;
4288 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004289
4290 new_thr->th.th_spin_here = FALSE;
4291 new_thr->th.th_next_waiting = 0;
4292
Alp Toker98758b02014-03-02 04:12:06 +00004293#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004294 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4295 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4296 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4297 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4298#endif
4299
4300 TCW_4(new_thr->th.th_in_pool, FALSE);
4301 new_thr->th.th_active_in_pool = FALSE;
4302 TCW_4(new_thr->th.th_active, TRUE);
4303
4304 /* adjust the global counters */
4305 __kmp_all_nth ++;
4306 __kmp_nth ++;
4307
4308 //
4309 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4310 // for low numbers of procs, and method #2 (keyed API call) for higher
4311 // numbers of procs.
4312 //
4313 if ( __kmp_adjust_gtid_mode ) {
4314 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4315 if ( TCR_4(__kmp_gtid_mode) != 2) {
4316 TCW_4(__kmp_gtid_mode, 2);
4317 }
4318 }
4319 else {
4320 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4321 TCW_4(__kmp_gtid_mode, 1);
4322 }
4323 }
4324 }
4325
4326#ifdef KMP_ADJUST_BLOCKTIME
4327 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004328 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004329 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4330 if ( __kmp_nth > __kmp_avail_proc ) {
4331 __kmp_zero_bt = TRUE;
4332 }
4333 }
4334#endif /* KMP_ADJUST_BLOCKTIME */
4335
4336 /* actually fork it and create the new worker thread */
4337 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4338 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4339 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4340
Jim Cownie5e8470a2013-09-27 10:38:44 +00004341 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4342 KMP_MB();
4343 return new_thr;
4344}
4345
4346/*
4347 * reinitialize team for reuse.
4348 *
4349 * The hot team code calls this case at every fork barrier, so EPCC barrier
4350 * test are extremely sensitive to changes in it, esp. writes to the team
4351 * struct, which cause a cache invalidation in all threads.
4352 *
4353 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4354 */
4355static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004356__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004357 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4358 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004359 KMP_DEBUG_ASSERT( team && new_icvs);
4360 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004361 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004362
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004363 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004364
Jim Cownie181b4bb2013-12-23 17:28:57 +00004365 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004366 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004367 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004368
4369 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4370 team->t.t_threads[0], team ) );
4371}
4372
Jim Cownie5e8470a2013-09-27 10:38:44 +00004373
4374/* initialize the team data structure
4375 * this assumes the t_threads and t_max_nproc are already set
4376 * also, we don't touch the arguments */
4377static void
4378__kmp_initialize_team(
4379 kmp_team_t * team,
4380 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004381 kmp_internal_control_t * new_icvs,
4382 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004383) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004384 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4385
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386 /* verify */
4387 KMP_DEBUG_ASSERT( team );
4388 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4389 KMP_DEBUG_ASSERT( team->t.t_threads );
4390 KMP_MB();
4391
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004392 team->t.t_master_tid = 0; /* not needed */
4393 /* team->t.t_master_bar; not needed */
4394 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4395 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004396
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004397 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4398 team->t.t_next_pool = NULL;
4399 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004400
4401 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004402 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004404 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4405 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406
4407#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004408 team->t.t_fp_control_saved = FALSE; /* not needed */
4409 team->t.t_x87_fpu_control_word = 0; /* not needed */
4410 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004411#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004413 team->t.t_construct = 0;
4414 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004415
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004416 team->t.t_ordered .dt.t_value = 0;
4417 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004419 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004420
4421#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004422 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004423#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004424 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004426 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004427
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004428 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004429
4430 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004431 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432}
4433
Alp Toker98758b02014-03-02 04:12:06 +00004434#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435/* Sets full mask for thread and returns old mask, no changes to structures. */
4436static void
4437__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4438{
4439 if ( KMP_AFFINITY_CAPABLE() ) {
4440 int status;
4441 if ( old_mask != NULL ) {
4442 status = __kmp_get_system_affinity( old_mask, TRUE );
4443 int error = errno;
4444 if ( status != 0 ) {
4445 __kmp_msg(
4446 kmp_ms_fatal,
4447 KMP_MSG( ChangeThreadAffMaskError ),
4448 KMP_ERR( error ),
4449 __kmp_msg_null
4450 );
4451 }
4452 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004453 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004454 }
4455}
4456#endif
4457
Alp Toker98758b02014-03-02 04:12:06 +00004458#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004459
4460//
4461// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4462// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004463// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004464// The master thread's partition should already include its current binding.
4465//
4466static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004467__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004468{
4469 //
4470 // Copy the master thread's place partion to the team struct
4471 //
4472 kmp_info_t *master_th = team->t.t_threads[0];
4473 KMP_DEBUG_ASSERT( master_th != NULL );
4474 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4475 int first_place = master_th->th.th_first_place;
4476 int last_place = master_th->th.th_last_place;
4477 int masters_place = master_th->th.th_current_place;
4478 team->t.t_first_place = first_place;
4479 team->t.t_last_place = last_place;
4480
4481 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4482 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4483 masters_place, first_place, last_place ) );
4484
4485 switch ( proc_bind ) {
4486
4487 case proc_bind_default:
4488 //
4489 // serial teams might have the proc_bind policy set to
4490 // proc_bind_default. It doesn't matter, as we don't
4491 // rebind the master thread for any proc_bind policy.
4492 //
4493 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4494 break;
4495
4496 case proc_bind_master:
4497 {
4498 int f;
4499 int n_th = team->t.t_nproc;
4500 for ( f = 1; f < n_th; f++ ) {
4501 kmp_info_t *th = team->t.t_threads[f];
4502 KMP_DEBUG_ASSERT( th != NULL );
4503 th->th.th_first_place = first_place;
4504 th->th.th_last_place = last_place;
4505 th->th.th_new_place = masters_place;
4506
4507 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4508 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4509 team->t.t_id, f, masters_place, first_place, last_place ) );
4510 }
4511 }
4512 break;
4513
4514 case proc_bind_close:
4515 {
4516 int f;
4517 int n_th = team->t.t_nproc;
4518 int n_places;
4519 if ( first_place <= last_place ) {
4520 n_places = last_place - first_place + 1;
4521 }
4522 else {
4523 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4524 }
4525 if ( n_th <= n_places ) {
4526 int place = masters_place;
4527 for ( f = 1; f < n_th; f++ ) {
4528 kmp_info_t *th = team->t.t_threads[f];
4529 KMP_DEBUG_ASSERT( th != NULL );
4530
4531 if ( place == last_place ) {
4532 place = first_place;
4533 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004534 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004535 place = 0;
4536 }
4537 else {
4538 place++;
4539 }
4540 th->th.th_first_place = first_place;
4541 th->th.th_last_place = last_place;
4542 th->th.th_new_place = place;
4543
4544 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4545 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4546 team->t.t_id, f, place, first_place, last_place ) );
4547 }
4548 }
4549 else {
4550 int S, rem, gap, s_count;
4551 S = n_th / n_places;
4552 s_count = 0;
4553 rem = n_th - ( S * n_places );
4554 gap = rem > 0 ? n_places/rem : n_places;
4555 int place = masters_place;
4556 int gap_ct = gap;
4557 for ( f = 0; f < n_th; f++ ) {
4558 kmp_info_t *th = team->t.t_threads[f];
4559 KMP_DEBUG_ASSERT( th != NULL );
4560
4561 th->th.th_first_place = first_place;
4562 th->th.th_last_place = last_place;
4563 th->th.th_new_place = place;
4564 s_count++;
4565
4566 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4567 // do nothing, add an extra thread to place on next iteration
4568 }
4569 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4570 // we added an extra thread to this place; move to next place
4571 if ( place == last_place ) {
4572 place = first_place;
4573 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004574 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004575 place = 0;
4576 }
4577 else {
4578 place++;
4579 }
4580 s_count = 0;
4581 gap_ct = 1;
4582 rem--;
4583 }
4584 else if (s_count == S) { // place full; don't add extra
4585 if ( place == last_place ) {
4586 place = first_place;
4587 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004588 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004589 place = 0;
4590 }
4591 else {
4592 place++;
4593 }
4594 gap_ct++;
4595 s_count = 0;
4596 }
4597
4598 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4599 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4600 team->t.t_id, f, th->th.th_new_place, first_place,
4601 last_place ) );
4602 }
4603 KMP_DEBUG_ASSERT( place == masters_place );
4604 }
4605 }
4606 break;
4607
4608 case proc_bind_spread:
4609 {
4610 int f;
4611 int n_th = team->t.t_nproc;
4612 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004613 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004614 if ( first_place <= last_place ) {
4615 n_places = last_place - first_place + 1;
4616 }
4617 else {
4618 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4619 }
4620 if ( n_th <= n_places ) {
4621 int place = masters_place;
4622 int S = n_places/n_th;
4623 int s_count, rem, gap, gap_ct;
4624 rem = n_places - n_th*S;
4625 gap = rem ? n_th/rem : 1;
4626 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004627 thidx = n_th;
4628 if (update_master_only == 1)
4629 thidx = 1;
4630 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004631 kmp_info_t *th = team->t.t_threads[f];
4632 KMP_DEBUG_ASSERT( th != NULL );
4633
4634 th->th.th_first_place = place;
4635 th->th.th_new_place = place;
4636 s_count = 1;
4637 while (s_count < S) {
4638 if ( place == last_place ) {
4639 place = first_place;
4640 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004641 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004642 place = 0;
4643 }
4644 else {
4645 place++;
4646 }
4647 s_count++;
4648 }
4649 if (rem && (gap_ct == gap)) {
4650 if ( place == last_place ) {
4651 place = first_place;
4652 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004653 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004654 place = 0;
4655 }
4656 else {
4657 place++;
4658 }
4659 rem--;
4660 gap_ct = 0;
4661 }
4662 th->th.th_last_place = place;
4663 gap_ct++;
4664
4665 if ( place == last_place ) {
4666 place = first_place;
4667 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004668 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004669 place = 0;
4670 }
4671 else {
4672 place++;
4673 }
4674
4675 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4676 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4677 team->t.t_id, f, th->th.th_new_place,
4678 th->th.th_first_place, th->th.th_last_place ) );
4679 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004680 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004681 }
4682 else {
4683 int S, rem, gap, s_count;
4684 S = n_th / n_places;
4685 s_count = 0;
4686 rem = n_th - ( S * n_places );
4687 gap = rem > 0 ? n_places/rem : n_places;
4688 int place = masters_place;
4689 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004690 thidx = n_th;
4691 if (update_master_only == 1)
4692 thidx = 1;
4693 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004694 kmp_info_t *th = team->t.t_threads[f];
4695 KMP_DEBUG_ASSERT( th != NULL );
4696
4697 th->th.th_first_place = place;
4698 th->th.th_last_place = place;
4699 th->th.th_new_place = place;
4700 s_count++;
4701
4702 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4703 // do nothing, add an extra thread to place on next iteration
4704 }
4705 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4706 // we added an extra thread to this place; move on to next place
4707 if ( place == last_place ) {
4708 place = first_place;
4709 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004710 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004711 place = 0;
4712 }
4713 else {
4714 place++;
4715 }
4716 s_count = 0;
4717 gap_ct = 1;
4718 rem--;
4719 }
4720 else if (s_count == S) { // place is full; don't add extra thread
4721 if ( place == last_place ) {
4722 place = first_place;
4723 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004724 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004725 place = 0;
4726 }
4727 else {
4728 place++;
4729 }
4730 gap_ct++;
4731 s_count = 0;
4732 }
4733
4734 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4735 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4736 team->t.t_id, f, th->th.th_new_place,
4737 th->th.th_first_place, th->th.th_last_place) );
4738 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004739 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740 }
4741 }
4742 break;
4743
4744 default:
4745 break;
4746 }
4747
4748 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4749}
4750
Alp Toker98758b02014-03-02 04:12:06 +00004751#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004752
4753/* allocate a new team data structure to use. take one off of the free pool if available */
4754kmp_team_t *
4755__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004756#if OMPT_SUPPORT
4757 ompt_parallel_id_t ompt_parallel_id,
4758#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004759#if OMP_40_ENABLED
4760 kmp_proc_bind_t new_proc_bind,
4761#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004762 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004763 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004764{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00004765 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004766 int f;
4767 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004768 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004769 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004770
4771 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4772 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4773 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4774 KMP_MB();
4775
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004776#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004777 kmp_hot_team_ptr_t *hot_teams;
4778 if( master ) {
4779 team = master->th.th_team;
4780 level = team->t.t_active_level;
4781 if( master->th.th_teams_microtask ) { // in teams construct?
4782 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4783 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4784 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4785 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4786 }
4787 }
4788 hot_teams = master->th.th_hot_teams;
4789 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4790 { // hot team has already been allocated for given level
4791 use_hot_team = 1;
4792 } else {
4793 use_hot_team = 0;
4794 }
4795 }
4796#endif
4797 // Optimization to use a "hot" team
4798 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004799 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004800#if KMP_NESTED_HOT_TEAMS
4801 team = hot_teams[level].hot_team;
4802#else
4803 team = root->r.r_hot_team;
4804#endif
4805#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004806 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004807 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4808 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004809 }
4810#endif
4811
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004812 // Has the number of threads changed?
4813 /* Let's assume the most common case is that the number of threads is unchanged, and
4814 put that case first. */
4815 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4816 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004817 // This case can mean that omp_set_num_threads() was called and the hot team size
4818 // was already reduced, so we check the special flag
4819 if ( team->t.t_size_changed == -1 ) {
4820 team->t.t_size_changed = 1;
4821 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004822 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004823 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004824
4825 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004826 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004827 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4828 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004829 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004830
4831 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4832
4833 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4834 0, team->t.t_threads[0], team ) );
4835 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4836
4837#if OMP_40_ENABLED
4838# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004839 if ( ( team->t.t_size_changed == 0 )
4840 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004841 if (new_proc_bind == proc_bind_spread) {
4842 __kmp_partition_places(team, 1); // add flag to update only master for spread
4843 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004844 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4845 team->t.t_id, new_proc_bind, team->t.t_first_place,
4846 team->t.t_last_place ) );
4847 }
4848 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004849 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004850 __kmp_partition_places( team );
4851 }
4852# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004853 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004854# endif /* KMP_AFFINITY_SUPPORTED */
4855#endif /* OMP_40_ENABLED */
4856 }
4857 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004858 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4859
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004860 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004861#if KMP_NESTED_HOT_TEAMS
4862 if( __kmp_hot_teams_mode == 0 ) {
4863 // AC: saved number of threads should correspond to team's value in this mode,
4864 // can be bigger in mode 1, when hot team has some threads in reserve
4865 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4866 hot_teams[level].hot_team_nth = new_nproc;
4867#endif // KMP_NESTED_HOT_TEAMS
4868 /* release the extra threads we don't need any more */
4869 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4870 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004871 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4872 // When decreasing team size, threads no longer in the team should unref task team.
4873 team->t.t_threads[f]->th.th_task_team = NULL;
4874 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004875 __kmp_free_thread( team->t.t_threads[ f ] );
4876 team->t.t_threads[ f ] = NULL;
4877 }
4878#if KMP_NESTED_HOT_TEAMS
4879 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004880 else {
4881 // When keeping extra threads in team, switch threads to wait on own b_go flag
4882 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4883 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4884 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4885 for (int b=0; b<bs_last_barrier; ++b) {
4886 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4887 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4888 }
4889 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4890 }
4891 }
4892 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004893#endif // KMP_NESTED_HOT_TEAMS
4894 team->t.t_nproc = new_nproc;
4895 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004896 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4897 team->t.t_sched.chunk != new_icvs->sched.chunk)
4898 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004899 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004900
Jim Cownie5e8470a2013-09-27 10:38:44 +00004901 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004902 for(f = 0; f < new_nproc; ++f) {
4903 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004904 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004905 // restore the current task state of the master thread: should be the implicit task
4906 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4907 0, team->t.t_threads[0], team ) );
4908
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004909 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004910
4911#ifdef KMP_DEBUG
4912 for ( f = 0; f < team->t.t_nproc; f++ ) {
4913 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4914 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4915 }
4916#endif
4917
4918#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004919 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004920# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004921 __kmp_partition_places( team );
4922# endif
4923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004924 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004925 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004926#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004927 kmp_affin_mask_t *old_mask;
4928 if ( KMP_AFFINITY_CAPABLE() ) {
4929 KMP_CPU_ALLOC(old_mask);
4930 }
4931#endif
4932
4933 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4934
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004935 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004936
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004937#if KMP_NESTED_HOT_TEAMS
4938 int avail_threads = hot_teams[level].hot_team_nth;
4939 if( new_nproc < avail_threads )
4940 avail_threads = new_nproc;
4941 kmp_info_t **other_threads = team->t.t_threads;
4942 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4943 // Adjust barrier data of reserved threads (if any) of the team
4944 // Other data will be set in __kmp_initialize_info() below.
4945 int b;
4946 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4947 for ( b = 0; b < bs_last_barrier; ++ b ) {
4948 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4949 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004950#if USE_DEBUGGER
4951 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4952#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004953 }
4954 }
4955 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4956 // we have all needed threads in reserve, no need to allocate any
4957 // this only possible in mode 1, cannot have reserved threads in mode 0
4958 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4959 team->t.t_nproc = new_nproc; // just get reserved threads involved
4960 } else {
4961 // we may have some threads in reserve, but not enough
4962 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4963 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4964#endif // KMP_NESTED_HOT_TEAMS
4965 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004966 /* reallocate larger arrays */
4967 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004968 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004969 }
4970
Alp Toker98758b02014-03-02 04:12:06 +00004971#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004972 /* Temporarily set full mask for master thread before
4973 creation of workers. The reason is that workers inherit
4974 the affinity from master, so if a lot of workers are
4975 created on the single core quickly, they don't get
4976 a chance to set their own affinity for a long time.
4977 */
4978 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4979#endif
4980
4981 /* allocate new threads for the hot team */
4982 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4983 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4984 KMP_DEBUG_ASSERT( new_worker );
4985 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986
Jonathan Peytond26e2132015-09-10 18:44:30 +00004987 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004988 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4989 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4990 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4991
4992 { // Initialize barrier data for new threads.
4993 int b;
4994 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004995 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004996 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004997 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004998#if USE_DEBUGGER
4999 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5000#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005001 }
5002 }
5003 }
5004
Alp Toker98758b02014-03-02 04:12:06 +00005005#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005006 if ( KMP_AFFINITY_CAPABLE() ) {
5007 /* Restore initial master thread's affinity mask */
5008 __kmp_set_system_affinity( old_mask, TRUE );
5009 KMP_CPU_FREE(old_mask);
5010 }
5011#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005012#if KMP_NESTED_HOT_TEAMS
5013 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5014#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005015 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00005016 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005017 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005018
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005019 /* reinitialize the threads */
5020 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005021 for (f=0; f < team->t.t_nproc; ++f)
5022 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5023 if (level) { // set th_task_state for new threads in nested hot team
5024 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005025 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005026 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5027 for (f=old_nproc; f < team->t.t_nproc; ++f)
5028 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005029 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005030 else { // set th_task_state for new threads in non-nested hot team
5031 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5032 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005033 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005034 }
5035
Jim Cownie5e8470a2013-09-27 10:38:44 +00005036#ifdef KMP_DEBUG
5037 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5038 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5039 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5040 }
5041#endif
5042
5043#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005044 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005045# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005046 __kmp_partition_places( team );
5047# endif
5048#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005049 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005050
5051#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005052 kmp_info_t *master = team->t.t_threads[0];
5053 if( master->th.th_teams_microtask ) {
5054 for( f = 1; f < new_nproc; ++f ) {
5055 // propagate teams construct specific info to workers
5056 kmp_info_t *thr = team->t.t_threads[f];
5057 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5058 thr->th.th_teams_level = master->th.th_teams_level;
5059 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005060 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005061 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005062#endif /* OMP_40_ENABLED */
5063#if KMP_NESTED_HOT_TEAMS
5064 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005065 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005066 for( f = 1; f < new_nproc; ++f ) {
5067 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005068 int b;
5069 kmp_balign_t * balign = thr->th.th_bar;
5070 for( b = 0; b < bs_last_barrier; ++ b ) {
5071 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5072 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005073#if USE_DEBUGGER
5074 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5075#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005076 }
5077 }
5078 }
5079#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005080
5081 /* reallocate space for arguments if necessary */
5082 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005083 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005084 //
5085 // The hot team re-uses the previous task team,
5086 // if untouched during the previous release->gather phase.
5087 //
5088
5089 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5090
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005091#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005092 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005093 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5094 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005095 }
5096#endif
5097
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005098#if OMPT_SUPPORT
5099 __ompt_team_assign_id(team, ompt_parallel_id);
5100#endif
5101
Jim Cownie5e8470a2013-09-27 10:38:44 +00005102 KMP_MB();
5103
5104 return team;
5105 }
5106
5107 /* next, let's try to take one from the team pool */
5108 KMP_MB();
5109 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5110 {
5111 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5112 if ( team->t.t_max_nproc >= max_nproc ) {
5113 /* take this team from the team pool */
5114 __kmp_team_pool = team->t.t_next_pool;
5115
5116 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005117 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005118
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005119 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5120 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5121 team->t.t_task_team[0] = NULL;
5122 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005123
5124 /* reallocate space for arguments if necessary */
5125 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005126 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005127
5128 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5129 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5130 { // Initialize barrier data.
5131 int b;
5132 for ( b = 0; b < bs_last_barrier; ++ b) {
5133 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005134#if USE_DEBUGGER
5135 team->t.t_bar[ b ].b_master_arrived = 0;
5136 team->t.t_bar[ b ].b_team_arrived = 0;
5137#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005138 }
5139 }
5140
5141#if OMP_40_ENABLED
5142 team->t.t_proc_bind = new_proc_bind;
5143#endif
5144
5145 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005146
5147#if OMPT_SUPPORT
5148 __ompt_team_assign_id(team, ompt_parallel_id);
5149#endif
5150
Jim Cownie5e8470a2013-09-27 10:38:44 +00005151 KMP_MB();
5152
5153 return team;
5154 }
5155
5156 /* reap team if it is too small, then loop back and check the next one */
5157 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5158 /* TODO: Use technique to find the right size hot-team, don't reap them */
5159 team = __kmp_reap_team( team );
5160 __kmp_team_pool = team;
5161 }
5162
5163 /* nothing available in the pool, no matter, make a new team! */
5164 KMP_MB();
5165 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5166
5167 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005168 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005169 /* NOTE well, for some reason allocating one big buffer and dividing it
5170 * up seems to really hurt performance a lot on the P4, so, let's not use
5171 * this... */
5172 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005173
5174 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005175 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005176
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005177 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5178 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5179 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5180 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005181
5182 if ( __kmp_storage_map ) {
5183 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5184 }
5185
5186 /* allocate space for arguments */
5187 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005188 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005189
5190 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5191 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5192 { // Initialize barrier data.
5193 int b;
5194 for ( b = 0; b < bs_last_barrier; ++ b ) {
5195 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005196#if USE_DEBUGGER
5197 team->t.t_bar[ b ].b_master_arrived = 0;
5198 team->t.t_bar[ b ].b_team_arrived = 0;
5199#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005200 }
5201 }
5202
5203#if OMP_40_ENABLED
5204 team->t.t_proc_bind = new_proc_bind;
5205#endif
5206
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005207#if OMPT_SUPPORT
5208 __ompt_team_assign_id(team, ompt_parallel_id);
5209 team->t.ompt_serialized_team_info = NULL;
5210#endif
5211
Jim Cownie5e8470a2013-09-27 10:38:44 +00005212 KMP_MB();
5213
5214 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5215
5216 return team;
5217}
5218
5219/* TODO implement hot-teams at all levels */
5220/* TODO implement lazy thread release on demand (disband request) */
5221
5222/* free the team. return it to the team pool. release all the threads
5223 * associated with it */
5224void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005225__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005226{
5227 int f;
5228 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5229
5230 /* verify state */
5231 KMP_DEBUG_ASSERT( root );
5232 KMP_DEBUG_ASSERT( team );
5233 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5234 KMP_DEBUG_ASSERT( team->t.t_threads );
5235
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005236 int use_hot_team = team == root->r.r_hot_team;
5237#if KMP_NESTED_HOT_TEAMS
5238 int level;
5239 kmp_hot_team_ptr_t *hot_teams;
5240 if( master ) {
5241 level = team->t.t_active_level - 1;
5242 if( master->th.th_teams_microtask ) { // in teams construct?
5243 if( master->th.th_teams_size.nteams > 1 ) {
5244 ++level; // level was not increased in teams construct for team_of_masters
5245 }
5246 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5247 master->th.th_teams_level == team->t.t_level ) {
5248 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5249 } // team->t.t_level will be increased inside parallel
5250 }
5251 hot_teams = master->th.th_hot_teams;
5252 if( level < __kmp_hot_teams_max_level ) {
5253 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5254 use_hot_team = 1;
5255 }
5256 }
5257#endif // KMP_NESTED_HOT_TEAMS
5258
Jim Cownie5e8470a2013-09-27 10:38:44 +00005259 /* team is done working */
5260 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005261 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005262 // Do not reset pointer to parent team to NULL for hot teams.
5263
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005264 /* if we are non-hot team, release our threads */
5265 if( ! use_hot_team ) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005266 if (__kmp_tasking_mode != tskm_immediate_exec) {
5267 // Wait for threads to reach reapable state
5268 for (f = 1; f < team->t.t_nproc; ++f) {
5269 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5270 volatile kmp_uint32 *state = &team->t.t_threads[f]->th.th_reap_state;
5271 while (*state != KMP_SAFE_TO_REAP) {
5272#if KMP_OS_WINDOWS
5273 // On Windows a thread can be killed at any time, check this
5274 DWORD ecode;
5275 if (__kmp_is_thread_alive(team->t.t_threads[f], &ecode))
5276 KMP_CPU_PAUSE();
5277 else
5278 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5279#else
5280 KMP_CPU_PAUSE();
5281#endif
5282 }
5283 }
5284
Jonathan Peyton54127982015-11-04 21:37:48 +00005285 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005286 int tt_idx;
5287 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005288 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5289 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005290 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5291 team->t.t_threads[f]->th.th_task_team = NULL;
5292 }
5293 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005294#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005295 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005296#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005297 team->t.t_task_team[tt_idx] = NULL;
5298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005299 }
5300 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005301
5302 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005303 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005304 team->t.t_level = 0;
5305 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005306
Jim Cownie5e8470a2013-09-27 10:38:44 +00005307 /* free the worker threads */
5308 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5309 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5310 __kmp_free_thread( team->t.t_threads[ f ] );
5311 team->t.t_threads[ f ] = NULL;
5312 }
5313
Jim Cownie5e8470a2013-09-27 10:38:44 +00005314 /* put the team back in the team pool */
5315 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005316 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005317 __kmp_team_pool = (volatile kmp_team_t*) team;
5318 }
5319
5320 KMP_MB();
5321}
5322
5323
5324/* reap the team. destroy it, reclaim all its resources and free its memory */
5325kmp_team_t *
5326__kmp_reap_team( kmp_team_t *team )
5327{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005328 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005329
5330 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005331 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5332 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5333 KMP_DEBUG_ASSERT( team->t.t_threads );
5334 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005335
5336 /* TODO clean the threads that are a part of this? */
5337
5338 /* free stuff */
5339
5340 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005341 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5342 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005343 __kmp_free( team );
5344
5345 KMP_MB();
5346 return next_pool;
5347}
5348
5349//
5350// Free the thread. Don't reap it, just place it on the pool of available
5351// threads.
5352//
5353// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5354// binding for the affinity mechanism to be useful.
5355//
5356// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5357// However, we want to avoid a potential performance problem by always
5358// scanning through the list to find the correct point at which to insert
5359// the thread (potential N**2 behavior). To do this we keep track of the
5360// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5361// With single-level parallelism, threads will always be added to the tail
5362// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5363// parallelism, all bets are off and we may need to scan through the entire
5364// free list.
5365//
5366// This change also has a potentially large performance benefit, for some
5367// applications. Previously, as threads were freed from the hot team, they
5368// would be placed back on the free list in inverse order. If the hot team
5369// grew back to it's original size, then the freed thread would be placed
5370// back on the hot team in reverse order. This could cause bad cache
5371// locality problems on programs where the size of the hot team regularly
5372// grew and shrunk.
5373//
5374// Now, for single-level parallelism, the OMP tid is alway == gtid.
5375//
5376void
5377__kmp_free_thread( kmp_info_t *this_th )
5378{
5379 int gtid;
5380 kmp_info_t **scan;
5381
5382 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5383 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5384
5385 KMP_DEBUG_ASSERT( this_th );
5386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005387 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5388 int b;
5389 kmp_balign_t *balign = this_th->th.th_bar;
5390 for (b=0; b<bs_last_barrier; ++b) {
5391 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5392 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5393 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005394 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005395 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005396 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005397
Jim Cownie5e8470a2013-09-27 10:38:44 +00005398 /* put thread back on the free pool */
5399 TCW_PTR(this_th->th.th_team, NULL);
5400 TCW_PTR(this_th->th.th_root, NULL);
5401 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5402
5403 //
5404 // If the __kmp_thread_pool_insert_pt is already past the new insert
5405 // point, then we need to re-scan the entire list.
5406 //
5407 gtid = this_th->th.th_info.ds.ds_gtid;
5408 if ( __kmp_thread_pool_insert_pt != NULL ) {
5409 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5410 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5411 __kmp_thread_pool_insert_pt = NULL;
5412 }
5413 }
5414
5415 //
5416 // Scan down the list to find the place to insert the thread.
5417 // scan is the address of a link in the list, possibly the address of
5418 // __kmp_thread_pool itself.
5419 //
5420 // In the absence of nested parallism, the for loop will have 0 iterations.
5421 //
5422 if ( __kmp_thread_pool_insert_pt != NULL ) {
5423 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5424 }
5425 else {
5426 scan = (kmp_info_t **)&__kmp_thread_pool;
5427 }
5428 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5429 scan = &( (*scan)->th.th_next_pool ) );
5430
5431 //
5432 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5433 // to its address.
5434 //
5435 TCW_PTR(this_th->th.th_next_pool, *scan);
5436 __kmp_thread_pool_insert_pt = *scan = this_th;
5437 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5438 || ( this_th->th.th_info.ds.ds_gtid
5439 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5440 TCW_4(this_th->th.th_in_pool, TRUE);
5441 __kmp_thread_pool_nth++;
5442
5443 TCW_4(__kmp_nth, __kmp_nth - 1);
5444
5445#ifdef KMP_ADJUST_BLOCKTIME
5446 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005447 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005448 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5449 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5450 if ( __kmp_nth <= __kmp_avail_proc ) {
5451 __kmp_zero_bt = FALSE;
5452 }
5453 }
5454#endif /* KMP_ADJUST_BLOCKTIME */
5455
5456 KMP_MB();
5457}
5458
Jim Cownie5e8470a2013-09-27 10:38:44 +00005459
Jim Cownie5e8470a2013-09-27 10:38:44 +00005460/* ------------------------------------------------------------------------ */
5461
5462void *
5463__kmp_launch_thread( kmp_info_t *this_thr )
5464{
5465 int gtid = this_thr->th.th_info.ds.ds_gtid;
5466/* void *stack_data;*/
5467 kmp_team_t *(*volatile pteam);
5468
5469 KMP_MB();
5470 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5471
5472 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005473 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005474 }
5475
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005476#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005477 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005478 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5479 this_thr->th.ompt_thread_info.wait_id = 0;
5480 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005481 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005482 __ompt_thread_begin(ompt_thread_worker, gtid);
5483 }
5484 }
5485#endif
5486
Jim Cownie5e8470a2013-09-27 10:38:44 +00005487 /* This is the place where threads wait for work */
5488 while( ! TCR_4(__kmp_global.g.g_done) ) {
5489 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5490 KMP_MB();
5491
5492 /* wait for work to do */
5493 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5494
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005495#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005496 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005497 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5498 }
5499#endif
5500
Jim Cownie5e8470a2013-09-27 10:38:44 +00005501 /* No tid yet since not part of a team */
5502 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5503
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005504#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005505 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005506 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5507 }
5508#endif
5509
Jim Cownie5e8470a2013-09-27 10:38:44 +00005510 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5511
5512 /* have we been allocated? */
5513 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005514#if OMPT_SUPPORT
5515 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005516 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005517 if (ompt_enabled) {
5518 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005519 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005520 }
5521#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005522 /* we were just woken up, so run our new task */
5523 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5524 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005525 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5526 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005527
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005528 updateHWFPControl (*pteam);
5529
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005530#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005531 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005532 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005533 // Initialize OMPT task id for implicit task.
5534 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005535 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005536 }
5537#endif
5538
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005539 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005540 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5541 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005542 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005543 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005544 KMP_ASSERT( rc );
5545
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005546#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005547 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005548 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005549 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005550
5551 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5552 }
5553#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005554 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005555 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5556 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005557 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005558 /* join barrier after parallel region */
5559 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005560#if OMPT_SUPPORT && OMPT_TRACE
5561 if (ompt_enabled) {
5562 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005563 // don't access *pteam here: it may have already been freed
5564 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005565 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5566 my_parallel_id, task_info->task_id);
5567 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005568 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005569 task_info->task_id = 0;
5570 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005571#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005572 }
5573 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005574 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005575
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005576#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005577 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005578 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5579 __ompt_thread_end(ompt_thread_worker, gtid);
5580 }
5581#endif
5582
Jonathan Peyton54127982015-11-04 21:37:48 +00005583 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005584 /* run the destructors for the threadprivate data for this thread */
5585 __kmp_common_destroy_gtid( gtid );
5586
5587 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5588 KMP_MB();
5589 return this_thr;
5590}
5591
5592/* ------------------------------------------------------------------------ */
5593/* ------------------------------------------------------------------------ */
5594
Jim Cownie5e8470a2013-09-27 10:38:44 +00005595void
5596__kmp_internal_end_dest( void *specific_gtid )
5597{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005598 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005599 #pragma warning( push )
5600 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5601 #endif
5602 // Make sure no significant bits are lost
5603 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005604 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005605 #pragma warning( pop )
5606 #endif
5607
5608 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5609 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5610 * this is because 0 is reserved for the nothing-stored case */
5611
5612 /* josh: One reason for setting the gtid specific data even when it is being
5613 destroyed by pthread is to allow gtid lookup through thread specific data
5614 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5615 that gets executed in the call to __kmp_internal_end_thread, actually
5616 gets the gtid through the thread specific data. Setting it here seems
5617 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5618 to run smoothly.
5619 todo: get rid of this after we remove the dependence on
5620 __kmp_gtid_get_specific
5621 */
5622 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5623 __kmp_gtid_set_specific( gtid );
5624 #ifdef KMP_TDATA_GTID
5625 __kmp_gtid = gtid;
5626 #endif
5627 __kmp_internal_end_thread( gtid );
5628}
5629
Jonathan Peyton99016992015-05-26 17:32:53 +00005630#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005631
5632// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005633// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005634// option in makefile.mk works fine.
5635
5636__attribute__(( destructor ))
5637void
5638__kmp_internal_end_dtor( void )
5639{
5640 __kmp_internal_end_atexit();
5641}
5642
5643void
5644__kmp_internal_end_fini( void )
5645{
5646 __kmp_internal_end_atexit();
5647}
5648
5649#endif
5650
5651/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5652void
5653__kmp_internal_end_atexit( void )
5654{
5655 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5656 /* [Windows]
5657 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5658 stat code that depends on thread specific data for gtid fails because that data becomes
5659 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5660 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5661 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5662
5663// TODO: Can some of this comment about GVS be removed?
5664 I suspect that the offending stat code is executed when the calling thread tries to
5665 clean up a dead root thread's data structures, resulting in GVS code trying to close
5666 the GVS structures for that thread, but since the stat code uses
5667 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5668 cleaning up itself instead of another thread, it gets confused. This happens because
5669 allowing a thread to unregister and cleanup another thread is a recent modification for
5670 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5671 thread may end up trying to unregister another thread only if thread death does not
5672 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5673 specific data destructor function to detect thread death. For Windows dynamic, there
5674 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5675 workaround is applicable only for Windows static stat library.
5676 */
5677 __kmp_internal_end_library( -1 );
5678 #if KMP_OS_WINDOWS
5679 __kmp_close_console();
5680 #endif
5681}
5682
5683static void
5684__kmp_reap_thread(
5685 kmp_info_t * thread,
5686 int is_root
5687) {
5688
Alp Toker8f2d3f02014-02-24 10:40:15 +00005689 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005690
5691 int gtid;
5692
5693 KMP_DEBUG_ASSERT( thread != NULL );
5694
5695 gtid = thread->th.th_info.ds.ds_gtid;
5696
5697 if ( ! is_root ) {
5698
5699 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5700 /* Assume the threads are at the fork barrier here */
5701 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5702 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00005703 ANNOTATE_HAPPENS_BEFORE(thread);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005704 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5705 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005706 }; // if
5707
Jim Cownie5e8470a2013-09-27 10:38:44 +00005708 // Terminate OS thread.
5709 __kmp_reap_worker( thread );
5710
5711 //
5712 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005713 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005714 //
5715 // There is a small timing hole here - if the worker thread was
5716 // just waking up after sleeping in the pool, had reset it's
5717 // th_active_in_pool flag but not decremented the global counter
5718 // __kmp_thread_pool_active_nth yet, then the global counter
5719 // might not get updated.
5720 //
5721 // Currently, this can only happen as the library is unloaded,
5722 // so there are no harmful side effects.
5723 //
5724 if ( thread->th.th_active_in_pool ) {
5725 thread->th.th_active_in_pool = FALSE;
5726 KMP_TEST_THEN_DEC32(
5727 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5728 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5729 }
5730
5731 // Decrement # of [worker] threads in the pool.
5732 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5733 --__kmp_thread_pool_nth;
5734 }; // if
5735
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005736 __kmp_free_implicit_task(thread);
5737
Jim Cownie5e8470a2013-09-27 10:38:44 +00005738 // Free the fast memory for tasking
5739 #if USE_FAST_MEMORY
5740 __kmp_free_fast_memory( thread );
5741 #endif /* USE_FAST_MEMORY */
5742
5743 __kmp_suspend_uninitialize_thread( thread );
5744
5745 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5746 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5747
5748 -- __kmp_all_nth;
5749 // __kmp_nth was decremented when thread is added to the pool.
5750
5751#ifdef KMP_ADJUST_BLOCKTIME
5752 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005753 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005754 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5755 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5756 if ( __kmp_nth <= __kmp_avail_proc ) {
5757 __kmp_zero_bt = FALSE;
5758 }
5759 }
5760#endif /* KMP_ADJUST_BLOCKTIME */
5761
5762 /* free the memory being used */
5763 if( __kmp_env_consistency_check ) {
5764 if ( thread->th.th_cons ) {
5765 __kmp_free_cons_stack( thread->th.th_cons );
5766 thread->th.th_cons = NULL;
5767 }; // if
5768 }
5769
5770 if ( thread->th.th_pri_common != NULL ) {
5771 __kmp_free( thread->th.th_pri_common );
5772 thread->th.th_pri_common = NULL;
5773 }; // if
5774
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005775 if (thread->th.th_task_state_memo_stack != NULL) {
5776 __kmp_free(thread->th.th_task_state_memo_stack);
5777 thread->th.th_task_state_memo_stack = NULL;
5778 }
5779
Jim Cownie5e8470a2013-09-27 10:38:44 +00005780 #if KMP_USE_BGET
5781 if ( thread->th.th_local.bget_data != NULL ) {
5782 __kmp_finalize_bget( thread );
5783 }; // if
5784 #endif
5785
Alp Toker98758b02014-03-02 04:12:06 +00005786#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005787 if ( thread->th.th_affin_mask != NULL ) {
5788 KMP_CPU_FREE( thread->th.th_affin_mask );
5789 thread->th.th_affin_mask = NULL;
5790 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005791#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005792
5793 __kmp_reap_team( thread->th.th_serial_team );
5794 thread->th.th_serial_team = NULL;
5795 __kmp_free( thread );
5796
5797 KMP_MB();
5798
5799} // __kmp_reap_thread
5800
5801static void
5802__kmp_internal_end(void)
5803{
5804 int i;
5805
5806 /* First, unregister the library */
5807 __kmp_unregister_library();
5808
5809 #if KMP_OS_WINDOWS
5810 /* In Win static library, we can't tell when a root actually dies, so we
5811 reclaim the data structures for any root threads that have died but not
5812 unregistered themselves, in order to shut down cleanly.
5813 In Win dynamic library we also can't tell when a thread dies.
5814 */
5815 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5816 #endif
5817
5818 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5819 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005820 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005821 break;
5822 KMP_MB(); /* Flush all pending memory write invalidates. */
5823 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5824
5825 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005826#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005827 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5828 KMP_MB(); /* Flush all pending memory write invalidates. */
5829
5830 //
5831 // Need to check that monitor was initialized before reaping it.
5832 // If we are called form __kmp_atfork_child (which sets
5833 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5834 // contain valid data, but it is only valid in the parent process,
5835 // not the child.
5836 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005837 // New behavior (201008): instead of keying off of the flag
5838 // __kmp_init_parallel, the monitor thread creation is keyed off
5839 // of the new flag __kmp_init_monitor.
5840 //
5841 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5842 if ( TCR_4( __kmp_init_monitor ) ) {
5843 __kmp_reap_monitor( & __kmp_monitor );
5844 TCW_4( __kmp_init_monitor, 0 );
5845 }
5846 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5847 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005848#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005849 } else {
5850 /* TODO move this to cleanup code */
5851 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005852 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005853 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5854 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005855// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005856 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857 }
5858 }
5859 #endif
5860
5861 KMP_MB();
5862
5863 // Reap the worker threads.
5864 // This is valid for now, but be careful if threads are reaped sooner.
5865 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5866 // Get the next thread from the pool.
5867 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5868 __kmp_thread_pool = thread->th.th_next_pool;
5869 // Reap it.
Andrey Churbanov581490e2017-02-06 18:53:32 +00005870 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005871 thread->th.th_next_pool = NULL;
5872 thread->th.th_in_pool = FALSE;
5873 __kmp_reap_thread( thread, 0 );
5874 }; // while
5875 __kmp_thread_pool_insert_pt = NULL;
5876
5877 // Reap teams.
5878 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5879 // Get the next team from the pool.
5880 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5881 __kmp_team_pool = team->t.t_next_pool;
5882 // Reap it.
5883 team->t.t_next_pool = NULL;
5884 __kmp_reap_team( team );
5885 }; // while
5886
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005887 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005888
5889 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5890 // TBD: Add some checking...
5891 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5892 }
5893
5894 /* Make sure all threadprivate destructors get run by joining with all worker
5895 threads before resetting this flag */
5896 TCW_SYNC_4(__kmp_init_common, FALSE);
5897
5898 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5899 KMP_MB();
5900
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005901#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005902 //
5903 // See note above: One of the possible fixes for CQ138434 / CQ140126
5904 //
5905 // FIXME: push both code fragments down and CSE them?
5906 // push them into __kmp_cleanup() ?
5907 //
5908 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5909 if ( TCR_4( __kmp_init_monitor ) ) {
5910 __kmp_reap_monitor( & __kmp_monitor );
5911 TCW_4( __kmp_init_monitor, 0 );
5912 }
5913 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5914 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005915#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005916 } /* else !__kmp_global.t_active */
5917 TCW_4(__kmp_init_gtid, FALSE);
5918 KMP_MB(); /* Flush all pending memory write invalidates. */
5919
Jim Cownie5e8470a2013-09-27 10:38:44 +00005920 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005921#if OMPT_SUPPORT
5922 ompt_fini();
5923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005924}
5925
5926void
5927__kmp_internal_end_library( int gtid_req )
5928{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005929 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5930 /* this shouldn't be a race condition because __kmp_internal_end() is the
5931 * only place to clear __kmp_serial_init */
5932 /* we'll check this later too, after we get the lock */
5933 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5934 // because the next check will work in any case.
5935 if( __kmp_global.g.g_abort ) {
5936 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5937 /* TODO abort? */
5938 return;
5939 }
5940 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5941 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5942 return;
5943 }
5944
5945
5946 KMP_MB(); /* Flush all pending memory write invalidates. */
5947
5948 /* find out who we are and what we should do */
5949 {
5950 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5951 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5952 if( gtid == KMP_GTID_SHUTDOWN ) {
5953 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5954 return;
5955 } else if( gtid == KMP_GTID_MONITOR ) {
5956 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5957 return;
5958 } else if( gtid == KMP_GTID_DNE ) {
5959 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5960 /* we don't know who we are, but we may still shutdown the library */
5961 } else if( KMP_UBER_GTID( gtid )) {
5962 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005963 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005964 __kmp_global.g.g_abort = -1;
5965 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5966 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5967 return;
5968 } else {
5969 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5970 __kmp_unregister_root_current_thread( gtid );
5971 }
5972 } else {
5973 /* worker threads may call this function through the atexit handler, if they call exit() */
5974 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5975 TODO: do a thorough shutdown instead
5976 */
5977 #ifdef DUMP_DEBUG_ON_EXIT
5978 if ( __kmp_debug_buf )
5979 __kmp_dump_debug_buffer( );
5980 #endif
5981 return;
5982 }
5983 }
5984 /* synchronize the termination process */
5985 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5986
5987 /* have we already finished */
5988 if( __kmp_global.g.g_abort ) {
5989 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5990 /* TODO abort? */
5991 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5992 return;
5993 }
5994 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5995 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5996 return;
5997 }
5998
5999 /* We need this lock to enforce mutex between this reading of
6000 __kmp_threads_capacity and the writing by __kmp_register_root.
6001 Alternatively, we can use a counter of roots that is
6002 atomically updated by __kmp_get_global_thread_id_reg,
6003 __kmp_do_serial_initialize and __kmp_internal_end_*.
6004 */
6005 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6006
6007 /* now we can safely conduct the actual termination */
6008 __kmp_internal_end();
6009
6010 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6011 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6012
6013 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
6014
6015 #ifdef DUMP_DEBUG_ON_EXIT
6016 if ( __kmp_debug_buf )
6017 __kmp_dump_debug_buffer();
6018 #endif
6019
6020 #if KMP_OS_WINDOWS
6021 __kmp_close_console();
6022 #endif
6023
6024 __kmp_fini_allocator();
6025
6026} // __kmp_internal_end_library
6027
6028void
6029__kmp_internal_end_thread( int gtid_req )
6030{
6031 int i;
6032
6033 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6034 /* this shouldn't be a race condition because __kmp_internal_end() is the
6035 * only place to clear __kmp_serial_init */
6036 /* we'll check this later too, after we get the lock */
6037 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6038 // because the next check will work in any case.
6039 if( __kmp_global.g.g_abort ) {
6040 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6041 /* TODO abort? */
6042 return;
6043 }
6044 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6045 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6046 return;
6047 }
6048
6049 KMP_MB(); /* Flush all pending memory write invalidates. */
6050
6051 /* find out who we are and what we should do */
6052 {
6053 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6054 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6055 if( gtid == KMP_GTID_SHUTDOWN ) {
6056 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6057 return;
6058 } else if( gtid == KMP_GTID_MONITOR ) {
6059 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6060 return;
6061 } else if( gtid == KMP_GTID_DNE ) {
6062 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6063 return;
6064 /* we don't know who we are */
6065 } else if( KMP_UBER_GTID( gtid )) {
6066 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006067 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006068 __kmp_global.g.g_abort = -1;
6069 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6070 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6071 return;
6072 } else {
6073 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6074 __kmp_unregister_root_current_thread( gtid );
6075 }
6076 } else {
6077 /* just a worker thread, let's leave */
6078 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6079
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006080 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006081 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006082 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006083
6084 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6085 return;
6086 }
6087 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006088 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006089 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6090 // because we will better shutdown later in the library destructor.
6091 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006092 // in a loop forks and joins many openmp threads. We can save a lot of time
6093 // keeping worker threads alive until the program shutdown.
6094 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6095 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006096 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006097 return;
6098 #endif
6099 /* synchronize the termination process */
6100 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6101
6102 /* have we already finished */
6103 if( __kmp_global.g.g_abort ) {
6104 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6105 /* TODO abort? */
6106 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6107 return;
6108 }
6109 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6110 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6111 return;
6112 }
6113
6114 /* We need this lock to enforce mutex between this reading of
6115 __kmp_threads_capacity and the writing by __kmp_register_root.
6116 Alternatively, we can use a counter of roots that is
6117 atomically updated by __kmp_get_global_thread_id_reg,
6118 __kmp_do_serial_initialize and __kmp_internal_end_*.
6119 */
6120
6121 /* should we finish the run-time? are all siblings done? */
6122 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6123
6124 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6125 if ( KMP_UBER_GTID( i ) ) {
6126 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6127 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6128 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6129 return;
6130 };
6131 }
6132
6133 /* now we can safely conduct the actual termination */
6134
6135 __kmp_internal_end();
6136
6137 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6138 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6139
Jim Cownie77c2a632014-09-03 11:34:33 +00006140 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006141
6142 #ifdef DUMP_DEBUG_ON_EXIT
6143 if ( __kmp_debug_buf )
6144 __kmp_dump_debug_buffer();
6145 #endif
6146} // __kmp_internal_end_thread
6147
6148// -------------------------------------------------------------------------------------------------
6149// Library registration stuff.
6150
6151static long __kmp_registration_flag = 0;
6152 // Random value used to indicate library initialization.
6153static char * __kmp_registration_str = NULL;
6154 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6155
6156
6157static inline
6158char *
6159__kmp_reg_status_name() {
6160 /*
6161 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6162 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6163 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6164 */
6165 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6166} // __kmp_reg_status_get
6167
6168
6169void
6170__kmp_register_library_startup(
6171 void
6172) {
6173
6174 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6175 int done = 0;
6176 union {
6177 double dtime;
6178 long ltime;
6179 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006180 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006181 __kmp_initialize_system_tick();
6182 #endif
6183 __kmp_read_system_time( & time.dtime );
6184 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6185 __kmp_registration_str =
6186 __kmp_str_format(
6187 "%p-%lx-%s",
6188 & __kmp_registration_flag,
6189 __kmp_registration_flag,
6190 KMP_LIBRARY_FILE
6191 );
6192
6193 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6194
6195 while ( ! done ) {
6196
6197 char * value = NULL; // Actual value of the environment variable.
6198
6199 // Set environment variable, but do not overwrite if it is exist.
6200 __kmp_env_set( name, __kmp_registration_str, 0 );
6201 // Check the variable is written.
6202 value = __kmp_env_get( name );
6203 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6204
6205 done = 1; // Ok, environment variable set successfully, exit the loop.
6206
6207 } else {
6208
6209 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6210 // Check whether it alive or dead.
6211 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6212 char * tail = value;
6213 char * flag_addr_str = NULL;
6214 char * flag_val_str = NULL;
6215 char const * file_name = NULL;
6216 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6217 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6218 file_name = tail;
6219 if ( tail != NULL ) {
6220 long * flag_addr = 0;
6221 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006222 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6223 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006224 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6225 // First, check whether environment-encoded address is mapped into addr space.
6226 // If so, dereference it to see if it still has the right value.
6227
6228 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6229 neighbor = 1;
6230 } else {
6231 // If not, then we know the other copy of the library is no longer running.
6232 neighbor = 2;
6233 }; // if
6234 }; // if
6235 }; // if
6236 switch ( neighbor ) {
6237 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6238 // Assume it is the incompatible format of future version of the library.
6239 // Assume the other library is alive.
6240 // WARN( ... ); // TODO: Issue a warning.
6241 file_name = "unknown library";
6242 // Attention! Falling to the next case. That's intentional.
6243 case 1 : { // Neighbor is alive.
6244 // Check it is allowed.
6245 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6246 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6247 // That's not allowed. Issue fatal error.
6248 __kmp_msg(
6249 kmp_ms_fatal,
6250 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6251 KMP_HNT( DuplicateLibrary ),
6252 __kmp_msg_null
6253 );
6254 }; // if
6255 KMP_INTERNAL_FREE( duplicate_ok );
6256 __kmp_duplicate_library_ok = 1;
6257 done = 1; // Exit the loop.
6258 } break;
6259 case 2 : { // Neighbor is dead.
6260 // Clear the variable and try to register library again.
6261 __kmp_env_unset( name );
6262 } break;
6263 default : {
6264 KMP_DEBUG_ASSERT( 0 );
6265 } break;
6266 }; // switch
6267
6268 }; // if
6269 KMP_INTERNAL_FREE( (void *) value );
6270
6271 }; // while
6272 KMP_INTERNAL_FREE( (void *) name );
6273
6274} // func __kmp_register_library_startup
6275
6276
6277void
6278__kmp_unregister_library( void ) {
6279
6280 char * name = __kmp_reg_status_name();
6281 char * value = __kmp_env_get( name );
6282
6283 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6284 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6285 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6286 // Ok, this is our variable. Delete it.
6287 __kmp_env_unset( name );
6288 }; // if
6289
6290 KMP_INTERNAL_FREE( __kmp_registration_str );
6291 KMP_INTERNAL_FREE( value );
6292 KMP_INTERNAL_FREE( name );
6293
6294 __kmp_registration_flag = 0;
6295 __kmp_registration_str = NULL;
6296
6297} // __kmp_unregister_library
6298
6299
6300// End of Library registration stuff.
6301// -------------------------------------------------------------------------------------------------
6302
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006303#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6304
6305static void __kmp_check_mic_type()
6306{
6307 kmp_cpuid_t cpuid_state = {0};
6308 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006309 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006310 // We don't support mic1 at the moment
6311 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6312 __kmp_mic_type = mic2;
6313 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6314 __kmp_mic_type = mic3;
6315 } else {
6316 __kmp_mic_type = non_mic;
6317 }
6318}
6319
6320#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6321
Jim Cownie5e8470a2013-09-27 10:38:44 +00006322static void
6323__kmp_do_serial_initialize( void )
6324{
6325 int i, gtid;
6326 int size;
6327
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006328 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006329
6330 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6331 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6332 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6333 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6334 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6335
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006336#if OMPT_SUPPORT
6337 ompt_pre_init();
6338#endif
6339
Jim Cownie5e8470a2013-09-27 10:38:44 +00006340 __kmp_validate_locks();
6341
6342 /* Initialize internal memory allocator */
6343 __kmp_init_allocator();
6344
6345 /* Register the library startup via an environment variable
6346 and check to see whether another copy of the library is already
6347 registered. */
6348
6349 __kmp_register_library_startup( );
6350
6351 /* TODO reinitialization of library */
6352 if( TCR_4(__kmp_global.g.g_done) ) {
6353 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6354 }
6355
6356 __kmp_global.g.g_abort = 0;
6357 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6358
6359 /* initialize the locks */
6360#if KMP_USE_ADAPTIVE_LOCKS
6361#if KMP_DEBUG_ADAPTIVE_LOCKS
6362 __kmp_init_speculative_stats();
6363#endif
6364#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006365#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00006366 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006367#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006368 __kmp_init_lock( & __kmp_global_lock );
6369 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6370 __kmp_init_lock( & __kmp_debug_lock );
6371 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6372 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6373 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6374 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6375 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6376 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6377 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6378 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6382 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6383 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6384 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6385 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006386#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006387 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006388#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006389 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6390
6391 /* conduct initialization and initial setup of configuration */
6392
6393 __kmp_runtime_initialize();
6394
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006395#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6396 __kmp_check_mic_type();
6397#endif
6398
Jim Cownie5e8470a2013-09-27 10:38:44 +00006399 // Some global variable initialization moved here from kmp_env_initialize()
6400#ifdef KMP_DEBUG
6401 kmp_diag = 0;
6402#endif
6403 __kmp_abort_delay = 0;
6404
6405 // From __kmp_init_dflt_team_nth()
6406 /* assume the entire machine will be used */
6407 __kmp_dflt_team_nth_ub = __kmp_xproc;
6408 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6409 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6410 }
6411 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6412 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6413 }
6414 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006415
6416 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6417 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006418#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006419 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6420 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006421#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006422 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6423 __kmp_library = library_throughput;
6424 // From KMP_SCHEDULE initialization
6425 __kmp_static = kmp_sch_static_balanced;
6426 // AC: do not use analytical here, because it is non-monotonous
6427 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006428 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006429 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6430 // control parts
6431 #if KMP_FAST_REDUCTION_BARRIER
6432 #define kmp_reduction_barrier_gather_bb ((int)1)
6433 #define kmp_reduction_barrier_release_bb ((int)1)
6434 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6435 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6436 #endif // KMP_FAST_REDUCTION_BARRIER
6437 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6438 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6439 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6440 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6441 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6442 #if KMP_FAST_REDUCTION_BARRIER
6443 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6444 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6445 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6446 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6447 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6448 }
6449 #endif // KMP_FAST_REDUCTION_BARRIER
6450 }
6451 #if KMP_FAST_REDUCTION_BARRIER
6452 #undef kmp_reduction_barrier_release_pat
6453 #undef kmp_reduction_barrier_gather_pat
6454 #undef kmp_reduction_barrier_release_bb
6455 #undef kmp_reduction_barrier_gather_bb
6456 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006457#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006458 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006459 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006460 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006461 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6462 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6463 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6464 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006465#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006466 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006467 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6468 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6469 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006470#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006471#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006472
6473 // From KMP_CHECKS initialization
6474#ifdef KMP_DEBUG
6475 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6476#else
6477 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6478#endif
6479
6480 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6481 __kmp_foreign_tp = TRUE;
6482
6483 __kmp_global.g.g_dynamic = FALSE;
6484 __kmp_global.g.g_dynamic_mode = dynamic_default;
6485
6486 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006487
Jim Cownie5e8470a2013-09-27 10:38:44 +00006488 // Print all messages in message catalog for testing purposes.
6489 #ifdef KMP_DEBUG
6490 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6491 if ( __kmp_str_match_true( val ) ) {
6492 kmp_str_buf_t buffer;
6493 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006494 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006495 __kmp_printf( "%s", buffer.str );
6496 __kmp_str_buf_free( & buffer );
6497 }; // if
6498 __kmp_env_free( & val );
6499 #endif
6500
Jim Cownie181b4bb2013-12-23 17:28:57 +00006501 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006502 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6503 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6504
Jim Cownie5e8470a2013-09-27 10:38:44 +00006505 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6506 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6507 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6508 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6509 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6510 __kmp_thread_pool = NULL;
6511 __kmp_thread_pool_insert_pt = NULL;
6512 __kmp_team_pool = NULL;
6513
6514 /* Allocate all of the variable sized records */
6515 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6516 /* Since allocation is cache-aligned, just add extra padding at the end */
6517 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6518 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6519 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6520
6521 /* init thread counts */
6522 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6523 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6524 __kmp_all_nth = 0;
6525 __kmp_nth = 0;
6526
6527 /* setup the uber master thread and hierarchy */
6528 gtid = __kmp_register_root( TRUE );
6529 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6530 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6531 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6532
6533 KMP_MB(); /* Flush all pending memory write invalidates. */
6534
6535 __kmp_common_initialize();
6536
6537 #if KMP_OS_UNIX
6538 /* invoke the child fork handler */
6539 __kmp_register_atfork();
6540 #endif
6541
Jonathan Peyton99016992015-05-26 17:32:53 +00006542 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006543 {
6544 /* Invoke the exit handler when the program finishes, only for static library.
6545 For dynamic library, we already have _fini and DllMain.
6546 */
6547 int rc = atexit( __kmp_internal_end_atexit );
6548 if ( rc != 0 ) {
6549 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6550 }; // if
6551 }
6552 #endif
6553
6554 #if KMP_HANDLE_SIGNALS
6555 #if KMP_OS_UNIX
6556 /* NOTE: make sure that this is called before the user installs
6557 * their own signal handlers so that the user handlers
6558 * are called first. this way they can return false,
6559 * not call our handler, avoid terminating the library,
6560 * and continue execution where they left off. */
6561 __kmp_install_signals( FALSE );
6562 #endif /* KMP_OS_UNIX */
6563 #if KMP_OS_WINDOWS
6564 __kmp_install_signals( TRUE );
6565 #endif /* KMP_OS_WINDOWS */
6566 #endif
6567
6568 /* we have finished the serial initialization */
6569 __kmp_init_counter ++;
6570
6571 __kmp_init_serial = TRUE;
6572
6573 if (__kmp_settings) {
6574 __kmp_env_print();
6575 }
6576
6577#if OMP_40_ENABLED
6578 if (__kmp_display_env || __kmp_display_env_verbose) {
6579 __kmp_env_print_2();
6580 }
6581#endif // OMP_40_ENABLED
6582
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006583#if OMPT_SUPPORT
6584 ompt_post_init();
6585#endif
6586
Jim Cownie5e8470a2013-09-27 10:38:44 +00006587 KMP_MB();
6588
6589 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6590}
6591
6592void
6593__kmp_serial_initialize( void )
6594{
6595 if ( __kmp_init_serial ) {
6596 return;
6597 }
6598 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6599 if ( __kmp_init_serial ) {
6600 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6601 return;
6602 }
6603 __kmp_do_serial_initialize();
6604 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6605}
6606
6607static void
6608__kmp_do_middle_initialize( void )
6609{
6610 int i, j;
6611 int prev_dflt_team_nth;
6612
6613 if( !__kmp_init_serial ) {
6614 __kmp_do_serial_initialize();
6615 }
6616
6617 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6618
6619 //
6620 // Save the previous value for the __kmp_dflt_team_nth so that
6621 // we can avoid some reinitialization if it hasn't changed.
6622 //
6623 prev_dflt_team_nth = __kmp_dflt_team_nth;
6624
Alp Toker98758b02014-03-02 04:12:06 +00006625#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006626 //
6627 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6628 // number of cores on the machine.
6629 //
6630 __kmp_affinity_initialize();
6631
6632 //
6633 // Run through the __kmp_threads array and set the affinity mask
6634 // for each root thread that is currently registered with the RTL.
6635 //
6636 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6637 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6638 __kmp_affinity_set_init_mask( i, TRUE );
6639 }
6640 }
Alp Toker98758b02014-03-02 04:12:06 +00006641#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006642
6643 KMP_ASSERT( __kmp_xproc > 0 );
6644 if ( __kmp_avail_proc == 0 ) {
6645 __kmp_avail_proc = __kmp_xproc;
6646 }
6647
6648 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6649 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006650 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006651 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6652 j++;
6653 }
6654
6655 if ( __kmp_dflt_team_nth == 0 ) {
6656#ifdef KMP_DFLT_NTH_CORES
6657 //
6658 // Default #threads = #cores
6659 //
6660 __kmp_dflt_team_nth = __kmp_ncores;
6661 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6662 __kmp_dflt_team_nth ) );
6663#else
6664 //
6665 // Default #threads = #available OS procs
6666 //
6667 __kmp_dflt_team_nth = __kmp_avail_proc;
6668 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6669 __kmp_dflt_team_nth ) );
6670#endif /* KMP_DFLT_NTH_CORES */
6671 }
6672
6673 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6674 __kmp_dflt_team_nth = KMP_MIN_NTH;
6675 }
6676 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6677 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6678 }
6679
6680 //
6681 // There's no harm in continuing if the following check fails,
6682 // but it indicates an error in the previous logic.
6683 //
6684 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6685
6686 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6687 //
6688 // Run through the __kmp_threads array and set the num threads icv
6689 // for each root thread that is currently registered with the RTL
6690 // (which has not already explicitly set its nthreads-var with a
6691 // call to omp_set_num_threads()).
6692 //
6693 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6694 kmp_info_t *thread = __kmp_threads[ i ];
6695 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006696 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006697
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006698 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006699 }
6700 }
6701 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6702 __kmp_dflt_team_nth) );
6703
6704#ifdef KMP_ADJUST_BLOCKTIME
6705 /* Adjust blocktime to zero if necessary */
6706 /* now that __kmp_avail_proc is set */
6707 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6708 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6709 if ( __kmp_nth > __kmp_avail_proc ) {
6710 __kmp_zero_bt = TRUE;
6711 }
6712 }
6713#endif /* KMP_ADJUST_BLOCKTIME */
6714
6715 /* we have finished middle initialization */
6716 TCW_SYNC_4(__kmp_init_middle, TRUE);
6717
6718 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6719}
6720
6721void
6722__kmp_middle_initialize( void )
6723{
6724 if ( __kmp_init_middle ) {
6725 return;
6726 }
6727 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6728 if ( __kmp_init_middle ) {
6729 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6730 return;
6731 }
6732 __kmp_do_middle_initialize();
6733 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6734}
6735
6736void
6737__kmp_parallel_initialize( void )
6738{
6739 int gtid = __kmp_entry_gtid(); // this might be a new root
6740
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006741 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006742 if( TCR_4(__kmp_init_parallel) ) return;
6743 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6744 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6745
6746 /* TODO reinitialization after we have already shut down */
6747 if( TCR_4(__kmp_global.g.g_done) ) {
6748 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6749 __kmp_infinite_loop();
6750 }
6751
6752 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6753 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6754 */
6755 if( !__kmp_init_middle ) {
6756 __kmp_do_middle_initialize();
6757 }
6758
6759 /* begin initialization */
6760 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6761 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6762
6763#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6764 //
6765 // Save the FP control regs.
6766 // Worker threads will set theirs to these values at thread startup.
6767 //
6768 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6769 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6770 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6771#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6772
6773#if KMP_OS_UNIX
6774# if KMP_HANDLE_SIGNALS
6775 /* must be after __kmp_serial_initialize */
6776 __kmp_install_signals( TRUE );
6777# endif
6778#endif
6779
6780 __kmp_suspend_initialize();
6781
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006782#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006783 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6784 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6785 }
6786#else
6787 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6788 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6789 }
6790#endif
6791
6792 if ( __kmp_version ) {
6793 __kmp_print_version_2();
6794 }
6795
Jim Cownie5e8470a2013-09-27 10:38:44 +00006796 /* we have finished parallel initialization */
6797 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6798
6799 KMP_MB();
6800 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6801
6802 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6803}
6804
6805
6806/* ------------------------------------------------------------------------ */
6807
6808void
6809__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6810 kmp_team_t *team )
6811{
6812 kmp_disp_t *dispatch;
6813
6814 KMP_MB();
6815
6816 /* none of the threads have encountered any constructs, yet. */
6817 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006818#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006819 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006820#endif /* KMP_CACHE_MANAGE */
6821 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6822 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006823 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6824 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006825
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006826 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006827#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006828 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6829#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006830 if( __kmp_env_consistency_check )
6831 __kmp_push_parallel( gtid, team->t.t_ident );
6832
6833 KMP_MB(); /* Flush all pending memory write invalidates. */
6834}
6835
6836void
6837__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6838 kmp_team_t *team )
6839{
6840 if( __kmp_env_consistency_check )
6841 __kmp_pop_parallel( gtid, team->t.t_ident );
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006842
6843 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006844}
6845
6846int
6847__kmp_invoke_task_func( int gtid )
6848{
6849 int rc;
6850 int tid = __kmp_tid_from_gtid( gtid );
6851 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006852 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006853
6854 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6855#if USE_ITT_BUILD
6856 if ( __itt_stack_caller_create_ptr ) {
6857 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6858 }
6859#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006860#if INCLUDE_SSC_MARKS
6861 SSC_MARK_INVOKING();
6862#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006863
6864#if OMPT_SUPPORT
6865 void *dummy;
6866 void **exit_runtime_p;
6867 ompt_task_id_t my_task_id;
6868 ompt_parallel_id_t my_parallel_id;
6869
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006870 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006871 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6872 ompt_task_info.frame.exit_runtime_frame);
6873 } else {
6874 exit_runtime_p = &dummy;
6875 }
6876
6877#if OMPT_TRACE
6878 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6879 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006880 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006881 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6882 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6883 my_parallel_id, my_task_id);
6884 }
6885#endif
6886#endif
6887
Jonathan Peyton45be4502015-08-11 21:36:41 +00006888 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006889 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6890 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006891 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6892 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006893#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006894 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006895#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006896 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006897#if OMPT_SUPPORT
6898 *exit_runtime_p = NULL;
6899#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006900 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006901
Jim Cownie5e8470a2013-09-27 10:38:44 +00006902#if USE_ITT_BUILD
6903 if ( __itt_stack_caller_create_ptr ) {
6904 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6905 }
6906#endif /* USE_ITT_BUILD */
6907 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6908
6909 return rc;
6910}
6911
6912#if OMP_40_ENABLED
6913void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006914__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006915{
6916 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006917 kmp_info_t *thr = __kmp_threads[ gtid ];
6918 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006919 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006920 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6921 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6922 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006923 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006924 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006925 // Launch league of teams now, but not let workers execute
6926 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006927#if INCLUDE_SSC_MARKS
6928 SSC_MARK_FORKING();
6929#endif
6930 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006931 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006932#if OMPT_SUPPORT
6933 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6934#endif
6935 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006936 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6937 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006938#if INCLUDE_SSC_MARKS
6939 SSC_MARK_JOINING();
6940#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006941
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006942 // AC: last parameter "1" eliminates join barrier which won't work because
6943 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006944 __kmp_join_call( loc, gtid
6945#if OMPT_SUPPORT
6946 , fork_context_intel
6947#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006948 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006949}
6950
6951int
6952__kmp_invoke_teams_master( int gtid )
6953{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006954 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6955 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006956 #if KMP_DEBUG
6957 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6958 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6959 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006960 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6961 __kmp_teams_master( gtid );
6962 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006963 return 1;
6964}
6965#endif /* OMP_40_ENABLED */
6966
6967/* this sets the requested number of threads for the next parallel region
6968 * encountered by this team */
6969/* since this should be enclosed in the forkjoin critical section it
6970 * should avoid race conditions with assymmetrical nested parallelism */
6971
6972void
6973__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6974{
6975 kmp_info_t *thr = __kmp_threads[gtid];
6976
6977 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006978 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006979}
6980
6981#if OMP_40_ENABLED
6982
6983/* this sets the requested number of teams for the teams region and/or
6984 * the number of threads for the next parallel region encountered */
6985void
6986__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6987{
6988 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006989 KMP_DEBUG_ASSERT(num_teams >= 0);
6990 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006991
6992 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006993 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006994 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6995 if ( !__kmp_reserve_warn ) {
6996 __kmp_reserve_warn = 1;
6997 __kmp_msg(
6998 kmp_ms_warning,
6999 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
7000 KMP_HNT( Unset_ALL_THREADS ),
7001 __kmp_msg_null
7002 );
7003 }
7004 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007005 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007006 // Set number of teams (number of threads in the outer "parallel" of the teams)
7007 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7008
7009 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007010 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007011 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007012 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007013 num_threads = __kmp_avail_proc / num_teams;
7014 if( num_teams * num_threads > __kmp_max_nth ) {
7015 // adjust num_threads w/o warning as it is not user setting
7016 num_threads = __kmp_max_nth / num_teams;
7017 }
7018 } else {
7019 if( num_teams * num_threads > __kmp_max_nth ) {
7020 int new_threads = __kmp_max_nth / num_teams;
7021 if ( !__kmp_reserve_warn ) { // user asked for too many threads
7022 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
7023 __kmp_msg(
7024 kmp_ms_warning,
7025 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
7026 KMP_HNT( Unset_ALL_THREADS ),
7027 __kmp_msg_null
7028 );
7029 }
7030 num_threads = new_threads;
7031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007032 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007033 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007034}
7035
7036
7037//
7038// Set the proc_bind var to use in the following parallel region.
7039//
7040void
7041__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7042{
7043 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007044 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007045}
7046
7047#endif /* OMP_40_ENABLED */
7048
7049/* Launch the worker threads into the microtask. */
7050
7051void
7052__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7053{
7054 kmp_info_t *this_thr = __kmp_threads[gtid];
7055
7056#ifdef KMP_DEBUG
7057 int f;
7058#endif /* KMP_DEBUG */
7059
7060 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007061 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007062 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7063 KMP_MB(); /* Flush all pending memory write invalidates. */
7064
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007065 team->t.t_construct = 0; /* no single directives seen yet */
7066 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007067
7068 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007069 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007070 if ( team->t.t_max_nproc > 1 ) {
7071 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007072 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007073 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007074#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007075 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7076#endif
7077 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007078 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007079 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007080#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007081 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7082#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007083 }
7084
7085 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007086 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007087
7088#ifdef KMP_DEBUG
7089 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7090 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7091 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7092 }
7093#endif /* KMP_DEBUG */
7094
7095 /* release the worker threads so they may begin working */
7096 __kmp_fork_barrier( gtid, 0 );
7097}
7098
7099
7100void
7101__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7102{
7103 kmp_info_t *this_thr = __kmp_threads[gtid];
7104
7105 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007106 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007107 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7108 KMP_MB(); /* Flush all pending memory write invalidates. */
7109
7110 /* Join barrier after fork */
7111
7112#ifdef KMP_DEBUG
7113 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7114 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7115 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7116 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7117 __kmp_print_structure();
7118 }
7119 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7120 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7121#endif /* KMP_DEBUG */
7122
7123 __kmp_join_barrier( gtid ); /* wait for everyone */
7124
7125 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007126 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007127}
7128
7129
7130/* ------------------------------------------------------------------------ */
7131/* ------------------------------------------------------------------------ */
7132
7133#ifdef USE_LOAD_BALANCE
7134
7135//
7136// Return the worker threads actively spinning in the hot team, if we
7137// are at the outermost level of parallelism. Otherwise, return 0.
7138//
7139static int
7140__kmp_active_hot_team_nproc( kmp_root_t *root )
7141{
7142 int i;
7143 int retval;
7144 kmp_team_t *hot_team;
7145
7146 if ( root->r.r_active ) {
7147 return 0;
7148 }
7149 hot_team = root->r.r_hot_team;
7150 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7151 return hot_team->t.t_nproc - 1; // Don't count master thread
7152 }
7153
7154 //
7155 // Skip the master thread - it is accounted for elsewhere.
7156 //
7157 retval = 0;
7158 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7159 if ( hot_team->t.t_threads[i]->th.th_active ) {
7160 retval++;
7161 }
7162 }
7163 return retval;
7164}
7165
7166//
7167// Perform an automatic adjustment to the number of
7168// threads used by the next parallel region.
7169//
7170static int
7171__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7172{
7173 int retval;
7174 int pool_active;
7175 int hot_team_active;
7176 int team_curr_active;
7177 int system_active;
7178
7179 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7180 root, set_nproc ) );
7181 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007182 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007183 KMP_DEBUG_ASSERT( set_nproc > 1 );
7184
7185 if ( set_nproc == 1) {
7186 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7187 return 1;
7188 }
7189
7190 //
7191 // Threads that are active in the thread pool, active in the hot team
7192 // for this particular root (if we are at the outer par level), and
7193 // the currently executing thread (to become the master) are available
7194 // to add to the new team, but are currently contributing to the system
7195 // load, and must be accounted for.
7196 //
7197 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7198 hot_team_active = __kmp_active_hot_team_nproc( root );
7199 team_curr_active = pool_active + hot_team_active + 1;
7200
7201 //
7202 // Check the system load.
7203 //
7204 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7205 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7206 system_active, pool_active, hot_team_active ) );
7207
7208 if ( system_active < 0 ) {
7209 //
7210 // There was an error reading the necessary info from /proc,
7211 // so use the thread limit algorithm instead. Once we set
7212 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7213 // we shouldn't wind up getting back here.
7214 //
7215 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7216 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7217
7218 //
7219 // Make this call behave like the thread limit algorithm.
7220 //
7221 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7222 : root->r.r_hot_team->t.t_nproc);
7223 if ( retval > set_nproc ) {
7224 retval = set_nproc;
7225 }
7226 if ( retval < KMP_MIN_NTH ) {
7227 retval = KMP_MIN_NTH;
7228 }
7229
7230 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7231 return retval;
7232 }
7233
7234 //
7235 // There is a slight delay in the load balance algorithm in detecting
7236 // new running procs. The real system load at this instant should be
7237 // at least as large as the #active omp thread that are available to
7238 // add to the team.
7239 //
7240 if ( system_active < team_curr_active ) {
7241 system_active = team_curr_active;
7242 }
7243 retval = __kmp_avail_proc - system_active + team_curr_active;
7244 if ( retval > set_nproc ) {
7245 retval = set_nproc;
7246 }
7247 if ( retval < KMP_MIN_NTH ) {
7248 retval = KMP_MIN_NTH;
7249 }
7250
7251 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7252 return retval;
7253} // __kmp_load_balance_nproc()
7254
7255#endif /* USE_LOAD_BALANCE */
7256
Jim Cownie5e8470a2013-09-27 10:38:44 +00007257/* ------------------------------------------------------------------------ */
7258/* ------------------------------------------------------------------------ */
7259
7260/* NOTE: this is called with the __kmp_init_lock held */
7261void
7262__kmp_cleanup( void )
7263{
7264 int f;
7265
7266 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7267
7268 if (TCR_4(__kmp_init_parallel)) {
7269#if KMP_HANDLE_SIGNALS
7270 __kmp_remove_signals();
7271#endif
7272 TCW_4(__kmp_init_parallel, FALSE);
7273 }
7274
7275 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007276#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007277 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007278#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007279 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007280 TCW_4(__kmp_init_middle, FALSE);
7281 }
7282
7283 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7284
7285 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007286 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007287 __kmp_init_serial = FALSE;
7288 }
7289
7290 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7291 if ( __kmp_root[ f ] != NULL ) {
7292 __kmp_free( __kmp_root[ f ] );
7293 __kmp_root[ f ] = NULL;
7294 }
7295 }
7296 __kmp_free( __kmp_threads );
7297 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7298 // freeing __kmp_root.
7299 __kmp_threads = NULL;
7300 __kmp_root = NULL;
7301 __kmp_threads_capacity = 0;
7302
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007303#if KMP_USE_DYNAMIC_LOCK
7304 __kmp_cleanup_indirect_user_locks();
7305#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007306 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007307#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007308
Alp Toker98758b02014-03-02 04:12:06 +00007309 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007310 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7311 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007312 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007313
7314 #if KMP_USE_ADAPTIVE_LOCKS
7315 #if KMP_DEBUG_ADAPTIVE_LOCKS
7316 __kmp_print_speculative_stats();
7317 #endif
7318 #endif
7319 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7320 __kmp_nested_nth.nth = NULL;
7321 __kmp_nested_nth.size = 0;
7322 __kmp_nested_nth.used = 0;
Jonathan Peytond0365a22017-01-18 06:40:19 +00007323 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types );
7324 __kmp_nested_proc_bind.bind_types = NULL;
7325 __kmp_nested_proc_bind.size = 0;
7326 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007327
7328 __kmp_i18n_catclose();
7329
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007330#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00007331 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007332#endif
7333
Jim Cownie5e8470a2013-09-27 10:38:44 +00007334 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7335}
7336
7337/* ------------------------------------------------------------------------ */
7338/* ------------------------------------------------------------------------ */
7339
7340int
7341__kmp_ignore_mppbeg( void )
7342{
7343 char *env;
7344
7345 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7346 if (__kmp_str_match_false( env ))
7347 return FALSE;
7348 }
7349 // By default __kmpc_begin() is no-op.
7350 return TRUE;
7351}
7352
7353int
7354__kmp_ignore_mppend( void )
7355{
7356 char *env;
7357
7358 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7359 if (__kmp_str_match_false( env ))
7360 return FALSE;
7361 }
7362 // By default __kmpc_end() is no-op.
7363 return TRUE;
7364}
7365
7366void
7367__kmp_internal_begin( void )
7368{
7369 int gtid;
7370 kmp_root_t *root;
7371
7372 /* this is a very important step as it will register new sibling threads
7373 * and assign these new uber threads a new gtid */
7374 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007375 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007376 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7377
7378 if( root->r.r_begin ) return;
7379 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7380 if( root->r.r_begin ) {
7381 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7382 return;
7383 }
7384
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007385 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007386
7387 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7388}
7389
7390
7391/* ------------------------------------------------------------------------ */
7392/* ------------------------------------------------------------------------ */
7393
7394void
7395__kmp_user_set_library (enum library_type arg)
7396{
7397 int gtid;
7398 kmp_root_t *root;
7399 kmp_info_t *thread;
7400
7401 /* first, make sure we are initialized so we can get our gtid */
7402
7403 gtid = __kmp_entry_gtid();
7404 thread = __kmp_threads[ gtid ];
7405
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007406 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007407
7408 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7409 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7410 KMP_WARNING( SetLibraryIncorrectCall );
7411 return;
7412 }
7413
7414 switch ( arg ) {
7415 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007416 thread->th.th_set_nproc = 0;
7417 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007418 break;
7419 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007420 thread->th.th_set_nproc = 0;
7421 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007422 break;
7423 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007424 thread->th.th_set_nproc = 0;
7425 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007426 break;
7427 default:
7428 KMP_FATAL( UnknownLibraryType, arg );
7429 }
7430
7431 __kmp_aux_set_library ( arg );
7432}
7433
7434void
7435__kmp_aux_set_stacksize( size_t arg )
7436{
7437 if (! __kmp_init_serial)
7438 __kmp_serial_initialize();
7439
7440#if KMP_OS_DARWIN
7441 if (arg & (0x1000 - 1)) {
7442 arg &= ~(0x1000 - 1);
7443 if(arg + 0x1000) /* check for overflow if we round up */
7444 arg += 0x1000;
7445 }
7446#endif
7447 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7448
7449 /* only change the default stacksize before the first parallel region */
7450 if (! TCR_4(__kmp_init_parallel)) {
7451 size_t value = arg; /* argument is in bytes */
7452
7453 if (value < __kmp_sys_min_stksize )
7454 value = __kmp_sys_min_stksize ;
7455 else if (value > KMP_MAX_STKSIZE)
7456 value = KMP_MAX_STKSIZE;
7457
7458 __kmp_stksize = value;
7459
7460 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7461 }
7462
7463 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7464}
7465
7466/* set the behaviour of the runtime library */
7467/* TODO this can cause some odd behaviour with sibling parallelism... */
7468void
7469__kmp_aux_set_library (enum library_type arg)
7470{
7471 __kmp_library = arg;
7472
7473 switch ( __kmp_library ) {
7474 case library_serial :
7475 {
7476 KMP_INFORM( LibraryIsSerial );
7477 (void) __kmp_change_library( TRUE );
7478 }
7479 break;
7480 case library_turnaround :
7481 (void) __kmp_change_library( TRUE );
7482 break;
7483 case library_throughput :
7484 (void) __kmp_change_library( FALSE );
7485 break;
7486 default:
7487 KMP_FATAL( UnknownLibraryType, arg );
7488 }
7489}
7490
7491/* ------------------------------------------------------------------------ */
7492/* ------------------------------------------------------------------------ */
7493
7494void
7495__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7496{
7497 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007498#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007499 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007500#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007501 int bt_set;
7502
7503 __kmp_save_internal_controls( thread );
7504
7505 /* Normalize and set blocktime for the teams */
7506 if (blocktime < KMP_MIN_BLOCKTIME)
7507 blocktime = KMP_MIN_BLOCKTIME;
7508 else if (blocktime > KMP_MAX_BLOCKTIME)
7509 blocktime = KMP_MAX_BLOCKTIME;
7510
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007511 set__blocktime_team( thread->th.th_team, tid, blocktime );
7512 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007513
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007514#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007515 /* Calculate and set blocktime intervals for the teams */
7516 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7517
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007518 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7519 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007520#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007521
7522 /* Set whether blocktime has been set to "TRUE" */
7523 bt_set = TRUE;
7524
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007525 set__bt_set_team( thread->th.th_team, tid, bt_set );
7526 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007527#if KMP_USE_MONITOR
Samuel Antao33515192016-10-20 13:20:17 +00007528 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7529 "bt_intervals=%d, monitor_updates=%d\n",
7530 __kmp_gtid_from_tid(tid, thread->th.th_team),
7531 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7532 __kmp_monitor_wakeups));
7533#else
7534 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7535 __kmp_gtid_from_tid(tid, thread->th.th_team),
7536 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007537#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007538}
7539
7540void
7541__kmp_aux_set_defaults(
7542 char const * str,
7543 int len
7544) {
7545 if ( ! __kmp_init_serial ) {
7546 __kmp_serial_initialize();
7547 };
7548 __kmp_env_initialize( str );
7549
7550 if (__kmp_settings
7551#if OMP_40_ENABLED
7552 || __kmp_display_env || __kmp_display_env_verbose
7553#endif // OMP_40_ENABLED
7554 ) {
7555 __kmp_env_print();
7556 }
7557} // __kmp_aux_set_defaults
7558
7559/* ------------------------------------------------------------------------ */
7560
7561/*
7562 * internal fast reduction routines
7563 */
7564
Jim Cownie5e8470a2013-09-27 10:38:44 +00007565PACKED_REDUCTION_METHOD_T
7566__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7567 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7568 kmp_critical_name *lck )
7569{
7570
7571 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7572 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7573 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7574 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7575
7576 PACKED_REDUCTION_METHOD_T retval;
7577
7578 int team_size;
7579
7580 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7581 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7582
7583 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7584 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7585
7586 retval = critical_reduce_block;
7587
7588 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7589
7590 if( team_size == 1 ) {
7591
7592 retval = empty_reduce_block;
7593
7594 } else {
7595
7596 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7597 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7598
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007599 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007600
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007601 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007602
7603 int teamsize_cutoff = 4;
7604
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007605#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7606 if( __kmp_mic_type != non_mic ) {
7607 teamsize_cutoff = 8;
7608 }
7609#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007610 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007611 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007612 if ( atomic_available ) {
7613 retval = atomic_reduce_block;
7614 }
7615 } else {
7616 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7617 }
7618 } else if ( atomic_available ) {
7619 retval = atomic_reduce_block;
7620 }
7621 #else
7622 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007623 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007624
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007625 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007626
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007627 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007628
Jim Cownie5e8470a2013-09-27 10:38:44 +00007629 // basic tuning
7630
7631 if( atomic_available ) {
7632 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7633 retval = atomic_reduce_block;
7634 }
7635 } // otherwise: use critical section
7636
7637 #elif KMP_OS_DARWIN
7638
Jim Cownie5e8470a2013-09-27 10:38:44 +00007639 if( atomic_available && ( num_vars <= 3 ) ) {
7640 retval = atomic_reduce_block;
7641 } else if( tree_available ) {
7642 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7643 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7644 }
7645 } // otherwise: use critical section
7646
7647 #else
7648 #error "Unknown or unsupported OS"
7649 #endif
7650
7651 #else
7652 #error "Unknown or unsupported architecture"
7653 #endif
7654
7655 }
7656
Jim Cownie5e8470a2013-09-27 10:38:44 +00007657 // KMP_FORCE_REDUCTION
7658
Andrey Churbanovec23a952015-08-17 10:12:12 +00007659 // If the team is serialized (team_size == 1), ignore the forced reduction
7660 // method and stay with the unsynchronized method (empty_reduce_block)
7661 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007662
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007663 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007664
7665 int atomic_available, tree_available;
7666
7667 switch( ( forced_retval = __kmp_force_reduction_method ) )
7668 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007669 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007670 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007671 break;
7672
7673 case atomic_reduce_block:
7674 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007675 if( ! atomic_available ) {
7676 KMP_WARNING(RedMethodNotSupported, "atomic");
7677 forced_retval = critical_reduce_block;
7678 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007679 break;
7680
7681 case tree_reduce_block:
7682 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007683 if( ! tree_available ) {
7684 KMP_WARNING(RedMethodNotSupported, "tree");
7685 forced_retval = critical_reduce_block;
7686 } else {
7687 #if KMP_FAST_REDUCTION_BARRIER
7688 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7689 #endif
7690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007691 break;
7692
7693 default:
7694 KMP_ASSERT( 0 ); // "unsupported method specified"
7695 }
7696
7697 retval = forced_retval;
7698 }
7699
7700 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7701
7702 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7703 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7704
7705 return ( retval );
7706}
7707
7708// this function is for testing set/get/determine reduce method
7709kmp_int32
7710__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007711 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007712}
7713
7714/* ------------------------------------------------------------------------ */