blob: 454e29a2d9f718d9c47ba477946e1f5cc2e96e11 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000028#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
44char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
45#endif /* defined(KMP_GOMP_COMPAT) */
46
47char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000048#if OMP_45_ENABLED
49 "4.5 (201511)";
50#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000051 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000052#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000053 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif
55
56#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000057char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000058#endif /* KMP_DEBUG */
59
Jim Cownie181b4bb2013-12-23 17:28:57 +000060#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
61
Jim Cownie5e8470a2013-09-27 10:38:44 +000062/* ------------------------------------------------------------------------ */
63/* ------------------------------------------------------------------------ */
64
65kmp_info_t __kmp_monitor;
66
67/* ------------------------------------------------------------------------ */
68/* ------------------------------------------------------------------------ */
69
70/* Forward declarations */
71
72void __kmp_cleanup( void );
73
74static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000075static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000076#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000077static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000078#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000079static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000080void __kmp_fork_barrier( int gtid, int tid );
81void __kmp_join_barrier( int gtid );
82void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000083
Jim Cownie5e8470a2013-09-27 10:38:44 +000084#ifdef USE_LOAD_BALANCE
85static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
86#endif
87
88static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000089#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000090static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000092static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
93static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
94static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
95
96/* ------------------------------------------------------------------------ */
97/* ------------------------------------------------------------------------ */
98
99/* Calculate the identifier of the current thread */
100/* fast (and somewhat portable) way to get unique */
101/* identifier of executing thread. */
102/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
103
104int
105__kmp_get_global_thread_id( )
106{
107 int i;
108 kmp_info_t **other_threads;
109 size_t stack_data;
110 char *stack_addr;
111 size_t stack_size;
112 char *stack_base;
113
114 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth ));
116
117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
118 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
119 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
120 __kmp_init_gtid for this to work. */
121
122 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
123
124#ifdef KMP_TDATA_GTID
125 if ( TCR_4(__kmp_gtid_mode) >= 3) {
126 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
127 return __kmp_gtid;
128 }
129#endif
130 if ( TCR_4(__kmp_gtid_mode) >= 2) {
131 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
132 return __kmp_gtid_get_specific();
133 }
134 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
135
136 stack_addr = (char*) & stack_data;
137 other_threads = __kmp_threads;
138
139 /*
140 ATT: The code below is a source of potential bugs due to unsynchronized access to
141 __kmp_threads array. For example:
142 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
143 2. Current thread is suspended by OS.
144 3. Another thread unregisters and finishes (debug versions of free() may fill memory
145 with something like 0xEF).
146 4. Current thread is resumed.
147 5. Current thread reads junk from *thr.
148 TODO: Fix it.
149 --ln
150 */
151
152 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
153
154 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
155 if( !thr ) continue;
156
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
160 /* stack grows down -- search through all of the active threads */
161
162 if( stack_addr <= stack_base ) {
163 size_t stack_diff = stack_base - stack_addr;
164
165 if( stack_diff <= stack_size ) {
166 /* The only way we can be closer than the allocated */
167 /* stack size is if we are running on this thread. */
168 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
169 return i;
170 }
171 }
172 }
173
174 /* get specific to try and determine our gtid */
175 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
176 "thread, using TLS\n" ));
177 i = __kmp_gtid_get_specific();
178
179 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
180
181 /* if we havn't been assigned a gtid, then return code */
182 if( i<0 ) return i;
183
184 /* dynamically updated stack window for uber threads to avoid get_specific call */
185 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
186 KMP_FATAL( StackOverflow, i );
187 }
188
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000189 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000190 if( stack_addr > stack_base ) {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
193 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
194 } else {
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
196 }
197
198 /* Reprint stack bounds for ubermaster since they have been refined */
199 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000200 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000203 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 "th_%d stack (refinement)", i );
205 }
206 return i;
207}
208
209int
210__kmp_get_global_thread_id_reg( )
211{
212 int gtid;
213
214 if ( !__kmp_init_serial ) {
215 gtid = KMP_GTID_DNE;
216 } else
217#ifdef KMP_TDATA_GTID
218 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
219 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
220 gtid = __kmp_gtid;
221 } else
222#endif
223 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
224 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
225 gtid = __kmp_gtid_get_specific();
226 } else {
227 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
228 gtid = __kmp_get_global_thread_id();
229 }
230
231 /* we must be a new uber master sibling thread */
232 if( gtid == KMP_GTID_DNE ) {
233 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
234 "Registering a new gtid.\n" ));
235 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
236 if( !__kmp_init_serial ) {
237 __kmp_do_serial_initialize();
238 gtid = __kmp_gtid_get_specific();
239 } else {
240 gtid = __kmp_register_root(FALSE);
241 }
242 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
243 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
244 }
245
246 KMP_DEBUG_ASSERT( gtid >=0 );
247
248 return gtid;
249}
250
251/* caller must hold forkjoin_lock */
252void
253__kmp_check_stack_overlap( kmp_info_t *th )
254{
255 int f;
256 char *stack_beg = NULL;
257 char *stack_end = NULL;
258 int gtid;
259
260 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
261 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000262 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
263 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264
265 gtid = __kmp_gtid_from_thread( th );
266
267 if (gtid == KMP_GTID_MONITOR) {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%s stack (%s)", "mon",
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 } else {
272 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%d stack (%s)", gtid,
274 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
275 }
276 }
277
278 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000279 gtid = __kmp_gtid_from_thread( th );
280 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 {
282 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
283 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
285 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 }
287
288 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
289 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
290
291 if( f_th && f_th != th ) {
292 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
293 char *other_stack_beg = other_stack_end -
294 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
295 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
296 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
297
298 /* Print the other stack values before the abort */
299 if ( __kmp_storage_map )
300 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
301 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
302 "th_%d stack (overlapped)",
303 __kmp_gtid_from_thread( f_th ) );
304
305 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
306 }
307 }
308 }
309 }
310 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
311}
312
313
314/* ------------------------------------------------------------------------ */
315
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316/* ------------------------------------------------------------------------ */
317
318void
319__kmp_infinite_loop( void )
320{
321 static int done = FALSE;
322
323 while (! done) {
324 KMP_YIELD( 1 );
325 }
326}
327
328#define MAX_MESSAGE 512
329
330void
331__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
332 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 va_list ap;
334
335 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000336 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
338 __kmp_vprintf( kmp_err, buffer, ap );
339#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000340 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000341 if(gtid >= 0) {
342 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
343 if( __kmp_storage_map_verbose ) {
344 node = __kmp_get_host_node(p1);
345 if(node < 0) /* doesn't work, so don't try this next time */
346 __kmp_storage_map_verbose = FALSE;
347 else {
348 char *last;
349 int lastNode;
350 int localProc = __kmp_get_cpu_from_gtid(gtid);
351
Jonathan Peyton762bc462016-10-26 21:42:48 +0000352 const int page_size = KMP_GET_PAGE_SIZE();
353
354 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) );
355 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356 if(localProc >= 0)
357 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
358 else
359 __kmp_printf_no_lock(" GTID %d\n", gtid);
360# if KMP_USE_PRCTL
361/* The more elaborate format is disabled for now because of the prctl hanging bug. */
362 do {
363 last = p1;
364 lastNode = node;
365 /* This loop collates adjacent pages with the same host node. */
366 do {
Jonathan Peyton762bc462016-10-26 21:42:48 +0000367 (char*)p1 += page_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000368 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
369 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
370 (char*)p1 - 1, lastNode);
371 } while(p1 <= p2);
372# else
373 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000374 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000375 if(p1 < p2) {
376 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000377 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000378 }
379# endif
380 }
381 }
382 } else
383 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
384 }
385#endif /* KMP_PRINT_DATA_PLACEMENT */
386 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
387}
388
389void
390__kmp_warn( char const * format, ... )
391{
392 char buffer[MAX_MESSAGE];
393 va_list ap;
394
395 if ( __kmp_generate_warnings == kmp_warnings_off ) {
396 return;
397 }
398
399 va_start( ap, format );
400
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000401 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
403 __kmp_vprintf( kmp_err, buffer, ap );
404 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
405
406 va_end( ap );
407}
408
409void
410__kmp_abort_process()
411{
412
413 // Later threads may stall here, but that's ok because abort() will kill them.
414 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
415
416 if ( __kmp_debug_buf ) {
417 __kmp_dump_debug_buffer();
418 }; // if
419
420 if ( KMP_OS_WINDOWS ) {
421 // Let other threads know of abnormal termination and prevent deadlock
422 // if abort happened during library initialization or shutdown
423 __kmp_global.g.g_abort = SIGABRT;
424
425 /*
426 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
427 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
428 works well, but this function is not available in VS7 (this is not problem for DLL, but
429 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
430 not help, at least in some versions of MS C RTL.
431
432 It seems following sequence is the only way to simulate abort() and avoid pop-up error
433 box.
434 */
435 raise( SIGABRT );
436 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
437 } else {
438 abort();
439 }; // if
440
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
443
444} // __kmp_abort_process
445
446void
447__kmp_abort_thread( void )
448{
449 // TODO: Eliminate g_abort global variable and this function.
450 // In case of abort just call abort(), it will kill all the threads.
451 __kmp_infinite_loop();
452} // __kmp_abort_thread
453
454/* ------------------------------------------------------------------------ */
455
456/*
457 * Print out the storage map for the major kmp_info_t thread data structures
458 * that are allocated together.
459 */
460
461static void
462__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
463{
464 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
467 "th_%d.th_info", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
470 "th_%d.th_local", gtid );
471
472 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
474
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier+1],
477 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
478
479 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier+1],
481 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
482
483 #if KMP_FAST_REDUCTION_BARRIER
484 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
485 &thr->th.th_bar[bs_reduction_barrier+1],
486 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
487 #endif // KMP_FAST_REDUCTION_BARRIER
488}
489
490/*
491 * Print out the storage map for the major kmp_team_t team data structures
492 * that are allocated together.
493 */
494
495static void
496__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
497{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
500 header, team_id );
501
502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
504
505
506 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
507 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
508
509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
510 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
511
512 #if KMP_FAST_REDUCTION_BARRIER
513 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
514 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
515 #endif // KMP_FAST_REDUCTION_BARRIER
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
518 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
519
520 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
521 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
522
523 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
524 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
525 header, team_id );
526
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
528 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
529 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
530}
531
532static void __kmp_init_allocator() {}
533static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534
535/* ------------------------------------------------------------------------ */
536
Jonathan Peyton99016992015-05-26 17:32:53 +0000537#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000538# if KMP_OS_WINDOWS
539
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540static void
541__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
542 // TODO: Change to __kmp_break_bootstrap_lock().
543 __kmp_init_bootstrap_lock( lck ); // make the lock released
544}
545
546static void
547__kmp_reset_locks_on_process_detach( int gtid_req ) {
548 int i;
549 int thread_count;
550
551 // PROCESS_DETACH is expected to be called by a thread
552 // that executes ProcessExit() or FreeLibrary().
553 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
554 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
555 // However, in fact, some threads can be still alive here, although being about to be terminated.
556 // The threads in the array with ds_thread==0 are most suspicious.
557 // Actually, it can be not safe to access the __kmp_threads[].
558
559 // TODO: does it make sense to check __kmp_roots[] ?
560
561 // Let's check that there are no other alive threads registered with the OMP lib.
562 while( 1 ) {
563 thread_count = 0;
564 for( i = 0; i < __kmp_threads_capacity; ++i ) {
565 if( !__kmp_threads ) continue;
566 kmp_info_t* th = __kmp_threads[ i ];
567 if( th == NULL ) continue;
568 int gtid = th->th.th_info.ds.ds_gtid;
569 if( gtid == gtid_req ) continue;
570 if( gtid < 0 ) continue;
571 DWORD exit_val;
572 int alive = __kmp_is_thread_alive( th, &exit_val );
573 if( alive ) {
574 ++thread_count;
575 }
576 }
577 if( thread_count == 0 ) break; // success
578 }
579
580 // Assume that I'm alone.
581
582 // Now it might be probably safe to check and reset locks.
583 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
584 __kmp_reset_lock( &__kmp_forkjoin_lock );
585 #ifdef KMP_DEBUG
586 __kmp_reset_lock( &__kmp_stdio_lock );
587 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588}
589
590BOOL WINAPI
591DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
592 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
593
594 switch( fdwReason ) {
595
596 case DLL_PROCESS_ATTACH:
597 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
598
599 return TRUE;
600
601 case DLL_PROCESS_DETACH:
602 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
603 __kmp_gtid_get_specific() ));
604
605 if( lpReserved != NULL )
606 {
607 // lpReserved is used for telling the difference:
608 // lpReserved == NULL when FreeLibrary() was called,
609 // lpReserved != NULL when the process terminates.
610 // When FreeLibrary() is called, worker threads remain alive.
611 // So they will release the forkjoin lock by themselves.
612 // When the process terminates, worker threads disappear triggering
613 // the problem of unreleased forkjoin lock as described below.
614
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000615 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616 // The problem comes up if that worker thread becomes dead
617 // before it releases the forkjoin lock.
618 // The forkjoin lock remains taken, while the thread
619 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
620 // will try to take the forkjoin lock and will always fail,
621 // so that the application will never finish [normally].
622 // This scenario is possible if __kmpc_end() has not been executed.
623 // It looks like it's not a corner case, but common cases:
624 // - the main function was compiled by an alternative compiler;
625 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
626 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
627 // - alive foreign thread prevented __kmpc_end from doing cleanup.
628
629 // This is a hack to work around the problem.
630 // TODO: !!! to figure out something better.
631 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
632 }
633
634 __kmp_internal_end_library( __kmp_gtid_get_specific() );
635
636 return TRUE;
637
638 case DLL_THREAD_ATTACH:
639 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
640
641 /* if we wanted to register new siblings all the time here call
642 * __kmp_get_gtid(); */
643 return TRUE;
644
645 case DLL_THREAD_DETACH:
646 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
647 __kmp_gtid_get_specific() ));
648
649 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
650 return TRUE;
651 }
652
653 return TRUE;
654}
655
656# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000657#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658
659
660/* ------------------------------------------------------------------------ */
661
662/* Change the library type to "status" and return the old type */
663/* called from within initialization routines where __kmp_initz_lock is held */
664int
665__kmp_change_library( int status )
666{
667 int old_status;
668
669 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
670
671 if (status) {
672 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
673 }
674 else {
675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
676 }
677
678 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
679}
680
681/* ------------------------------------------------------------------------ */
682/* ------------------------------------------------------------------------ */
683
684/* __kmp_parallel_deo --
685 * Wait until it's our turn.
686 */
687void
688__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
689{
690 int gtid = *gtid_ref;
691#ifdef BUILD_PARALLEL_ORDERED
692 kmp_team_t *team = __kmp_team_from_gtid( gtid );
693#endif /* BUILD_PARALLEL_ORDERED */
694
695 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000696 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000697#if KMP_USE_DYNAMIC_LOCK
698 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
699#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 }
703#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000704 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000705 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KMP_MB();
708 }
709#endif /* BUILD_PARALLEL_ORDERED */
710}
711
712/* __kmp_parallel_dxo --
713 * Signal the next task.
714 */
715
716void
717__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
718{
719 int gtid = *gtid_ref;
720#ifdef BUILD_PARALLEL_ORDERED
721 int tid = __kmp_tid_from_gtid( gtid );
722 kmp_team_t *team = __kmp_team_from_gtid( gtid );
723#endif /* BUILD_PARALLEL_ORDERED */
724
725 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000726 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000727 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
728 }
729#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000730 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731 KMP_MB(); /* Flush all pending memory write invalidates. */
732
733 /* use the tid of the next thread in this team */
734 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000735 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000737#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000738 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000739 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
740 /* accept blame for "ordered" waiting */
741 kmp_info_t *this_thread = __kmp_threads[gtid];
742 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
743 this_thread->th.ompt_thread_info.wait_id);
744 }
745#endif
746
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747 KMP_MB(); /* Flush all pending memory write invalidates. */
748 }
749#endif /* BUILD_PARALLEL_ORDERED */
750}
751
752/* ------------------------------------------------------------------------ */
753/* ------------------------------------------------------------------------ */
754
755/* ------------------------------------------------------------------------ */
756/* ------------------------------------------------------------------------ */
757
758/* The BARRIER for a SINGLE process section is always explicit */
759
760int
761__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
762{
763 int status;
764 kmp_info_t *th;
765 kmp_team_t *team;
766
767 if( ! TCR_4(__kmp_init_parallel) )
768 __kmp_parallel_initialize();
769
770 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772 status = 0;
773
774 th->th.th_ident = id_ref;
775
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000776 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777 status = 1;
778 } else {
779 kmp_int32 old_this = th->th.th_local.this_construct;
780
781 ++th->th.th_local.this_construct;
782 /* try to set team count to thread count--success means thread got the
783 single block
784 */
785 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000786 if (team->t.t_construct == old_this) {
787 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
788 th->th.th_local.this_construct);
789 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000790#if USE_ITT_BUILD
791 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
792#if OMP_40_ENABLED
793 th->th.th_teams_microtask == NULL &&
794#endif
795 team->t.t_active_level == 1 )
796 { // Only report metadata by master of active team at level 1
797 __kmp_itt_metadata_single( id_ref );
798 }
799#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800 }
801
802 if( __kmp_env_consistency_check ) {
803 if (status && push_ws) {
804 __kmp_push_workshare( gtid, ct_psingle, id_ref );
805 } else {
806 __kmp_check_workshare( gtid, ct_psingle, id_ref );
807 }
808 }
809#if USE_ITT_BUILD
810 if ( status ) {
811 __kmp_itt_single_start( gtid );
812 }
813#endif /* USE_ITT_BUILD */
814 return status;
815}
816
817void
818__kmp_exit_single( int gtid )
819{
820#if USE_ITT_BUILD
821 __kmp_itt_single_end( gtid );
822#endif /* USE_ITT_BUILD */
823 if( __kmp_env_consistency_check )
824 __kmp_pop_workshare( gtid, ct_psingle, NULL );
825}
826
827
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828/*
829 * determine if we can go parallel or must use a serialized parallel region and
830 * how many threads we can use
831 * set_nproc is the number of threads requested for the team
832 * returns 0 if we should serialize or only use one thread,
833 * otherwise the number of threads to use
834 * The forkjoin lock is held by the caller.
835 */
836static int
837__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
838 int master_tid, int set_nthreads
839#if OMP_40_ENABLED
840 , int enter_teams
841#endif /* OMP_40_ENABLED */
842)
843{
844 int capacity;
845 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 KMP_DEBUG_ASSERT( __kmp_init_serial );
847 KMP_DEBUG_ASSERT( root && parent_team );
848
849 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000850 // If dyn-var is set, dynamically adjust the number of desired threads,
851 // according to the method specified by dynamic_mode.
852 //
853 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000854 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
855 ;
856 }
857#ifdef USE_LOAD_BALANCE
858 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
859 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
860 if ( new_nthreads == 1 ) {
861 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
862 master_tid ));
863 return 1;
864 }
865 if ( new_nthreads < set_nthreads ) {
866 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
867 master_tid, new_nthreads ));
868 }
869 }
870#endif /* USE_LOAD_BALANCE */
871 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
872 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
873 : root->r.r_hot_team->t.t_nproc);
874 if ( new_nthreads <= 1 ) {
875 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
876 master_tid ));
877 return 1;
878 }
879 if ( new_nthreads < set_nthreads ) {
880 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
881 master_tid, new_nthreads ));
882 }
883 else {
884 new_nthreads = set_nthreads;
885 }
886 }
887 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
888 if ( set_nthreads > 2 ) {
889 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
890 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
891 if ( new_nthreads == 1 ) {
892 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
893 master_tid ));
894 return 1;
895 }
896 if ( new_nthreads < set_nthreads ) {
897 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
898 master_tid, new_nthreads ));
899 }
900 }
901 }
902 else {
903 KMP_ASSERT( 0 );
904 }
905
906 //
907 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
908 //
909 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
910 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
911 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc );
913 if ( tl_nthreads <= 0 ) {
914 tl_nthreads = 1;
915 }
916
917 //
918 // If dyn-var is false, emit a 1-time warning.
919 //
920 if ( ! get__dynamic_2( parent_team, master_tid )
921 && ( ! __kmp_reserve_warn ) ) {
922 __kmp_reserve_warn = 1;
923 __kmp_msg(
924 kmp_ms_warning,
925 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
926 KMP_HNT( Unset_ALL_THREADS ),
927 __kmp_msg_null
928 );
929 }
930 if ( tl_nthreads == 1 ) {
931 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
932 master_tid ));
933 return 1;
934 }
935 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
936 master_tid, tl_nthreads ));
937 new_nthreads = tl_nthreads;
938 }
939
Jim Cownie5e8470a2013-09-27 10:38:44 +0000940 //
941 // Check if the threads array is large enough, or needs expanding.
942 //
943 // See comment in __kmp_register_root() about the adjustment if
944 // __kmp_threads[0] == NULL.
945 //
946 capacity = __kmp_threads_capacity;
947 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
948 --capacity;
949 }
950 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
951 root->r.r_hot_team->t.t_nproc ) > capacity ) {
952 //
953 // Expand the threads array.
954 //
955 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
956 root->r.r_hot_team->t.t_nproc ) - capacity;
957 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
958 if ( slotsAdded < slotsRequired ) {
959 //
960 // The threads array was not expanded enough.
961 //
962 new_nthreads -= ( slotsRequired - slotsAdded );
963 KMP_ASSERT( new_nthreads >= 1 );
964
965 //
966 // If dyn-var is false, emit a 1-time warning.
967 //
968 if ( ! get__dynamic_2( parent_team, master_tid )
969 && ( ! __kmp_reserve_warn ) ) {
970 __kmp_reserve_warn = 1;
971 if ( __kmp_tp_cached ) {
972 __kmp_msg(
973 kmp_ms_warning,
974 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
975 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
976 KMP_HNT( PossibleSystemLimitOnThreads ),
977 __kmp_msg_null
978 );
979 }
980 else {
981 __kmp_msg(
982 kmp_ms_warning,
983 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
984 KMP_HNT( SystemLimitOnThreads ),
985 __kmp_msg_null
986 );
987 }
988 }
989 }
990 }
991
992 if ( new_nthreads == 1 ) {
993 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
994 __kmp_get_gtid(), set_nthreads ) );
995 return 1;
996 }
997
998 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
999 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1000 return new_nthreads;
1001}
1002
1003/* ------------------------------------------------------------------------ */
1004/* ------------------------------------------------------------------------ */
1005
1006/* allocate threads from the thread pool and assign them to the new team */
1007/* we are assured that there are enough threads available, because we
1008 * checked on that earlier within critical section forkjoin */
1009
1010static void
1011__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1012 kmp_info_t *master_th, int master_gtid )
1013{
1014 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001015 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001016
1017 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1018 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1019 KMP_MB();
1020
1021 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001022 master_th->th.th_info.ds.ds_tid = 0;
1023 master_th->th.th_team = team;
1024 master_th->th.th_team_nproc = team->t.t_nproc;
1025 master_th->th.th_team_master = master_th;
1026 master_th->th.th_team_serialized = FALSE;
1027 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
1029 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001030#if KMP_NESTED_HOT_TEAMS
1031 use_hot_team = 0;
1032 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1033 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1034 int level = team->t.t_active_level - 1; // index in array of hot teams
1035 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1036 if( master_th->th.th_teams_size.nteams > 1 ) {
1037 ++level; // level was not increased in teams construct for team_of_masters
1038 }
1039 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1040 master_th->th.th_teams_level == team->t.t_level ) {
1041 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1042 } // team->t.t_level will be increased inside parallel
1043 }
1044 if( level < __kmp_hot_teams_max_level ) {
1045 if( hot_teams[level].hot_team ) {
1046 // hot team has already been allocated for given level
1047 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1048 use_hot_team = 1; // the team is ready to use
1049 } else {
1050 use_hot_team = 0; // AC: threads are not allocated yet
1051 hot_teams[level].hot_team = team; // remember new hot team
1052 hot_teams[level].hot_team_nth = team->t.t_nproc;
1053 }
1054 } else {
1055 use_hot_team = 0;
1056 }
1057 }
1058#else
1059 use_hot_team = team == root->r.r_hot_team;
1060#endif
1061 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062
1063 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065 __kmp_initialize_info( master_th, team, 0, master_gtid );
1066
1067 /* now, install the worker threads */
1068 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1069
1070 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001071 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1072 team->t.t_threads[ i ] = thr;
1073 KMP_DEBUG_ASSERT( thr );
1074 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001076 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1078 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1079 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1080 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001081#if OMP_40_ENABLED
1082 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1083 thr->th.th_teams_level = master_th->th.th_teams_level;
1084 thr->th.th_teams_size = master_th->th.th_teams_size;
1085#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086 { // Initialize threads' barrier data.
1087 int b;
1088 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1089 for ( b = 0; b < bs_last_barrier; ++ b ) {
1090 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001092#if USE_DEBUGGER
1093 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1094#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095 }; // for b
1096 }
1097 }
1098
Alp Toker98758b02014-03-02 04:12:06 +00001099#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 __kmp_partition_places( team );
1101#endif
1102
1103 }
1104
1105 KMP_MB();
1106}
1107
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001108#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1109//
1110// Propagate any changes to the floating point control registers out to the team
1111// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1112// so we don't make changes unless they are needed.
1113//
1114inline static void
1115propagateFPControl(kmp_team_t * team)
1116{
1117 if ( __kmp_inherit_fp_control ) {
1118 kmp_int16 x87_fpu_control_word;
1119 kmp_uint32 mxcsr;
1120
1121 // Get master values of FPU control flags (both X87 and vector)
1122 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1123 __kmp_store_mxcsr( &mxcsr );
1124 mxcsr &= KMP_X86_MXCSR_MASK;
1125
1126 // There is no point looking at t_fp_control_saved here.
1127 // If it is TRUE, we still have to update the values if they are different from those we now have.
1128 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1129 // that the values in the team are the same as those we have.
1130 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1131 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1132 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001133 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1134 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001135 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1136 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001137 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001138 }
1139 else {
1140 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001142 }
1143}
1144
1145// Do the opposite, setting the hardware registers to the updated values from the team.
1146inline static void
1147updateHWFPControl(kmp_team_t * team)
1148{
1149 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1150 //
1151 // Only reset the fp control regs if they have been changed in the team.
1152 // the parallel region that we are exiting.
1153 //
1154 kmp_int16 x87_fpu_control_word;
1155 kmp_uint32 mxcsr;
1156 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1157 __kmp_store_mxcsr( &mxcsr );
1158 mxcsr &= KMP_X86_MXCSR_MASK;
1159
1160 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1161 __kmp_clear_x87_fpu_status_word();
1162 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1163 }
1164
1165 if ( team->t.t_mxcsr != mxcsr ) {
1166 __kmp_load_mxcsr( &team->t.t_mxcsr );
1167 }
1168 }
1169}
1170#else
1171# define propagateFPControl(x) ((void)0)
1172# define updateHWFPControl(x) ((void)0)
1173#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1174
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175static void
1176__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1177
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178/*
1179 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1180 */
1181void
1182__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1183{
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1186
1187 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1188
1189 /* Skip all this code for autopar serialized loops since it results in
1190 unacceptable overhead */
1191 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1192 return;
1193
1194 if( ! TCR_4( __kmp_init_parallel ) )
1195 __kmp_parallel_initialize();
1196
1197 this_thr = __kmp_threads[ global_tid ];
1198 serial_team = this_thr->th.th_serial_team;
1199
1200 /* utilize the serialized team held by this thread */
1201 KMP_DEBUG_ASSERT( serial_team );
1202 KMP_MB();
1203
1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001205 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1206 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001207 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1208 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1209 this_thr->th.th_task_team = NULL;
1210 }
1211
1212#if OMP_40_ENABLED
1213 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1214 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1215 proc_bind = proc_bind_false;
1216 }
1217 else if ( proc_bind == proc_bind_default ) {
1218 //
1219 // No proc_bind clause was specified, so use the current value
1220 // of proc-bind-var for this parallel region.
1221 //
1222 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1223 }
1224 //
1225 // Reset for next parallel region
1226 //
1227 this_thr->th.th_set_proc_bind = proc_bind_default;
1228#endif /* OMP_40_ENABLED */
1229
1230 if( this_thr->th.th_team != serial_team ) {
1231 // Nested level will be an index in the nested nthreads array
1232 int level = this_thr->th.th_team->t.t_level;
1233
1234 if( serial_team->t.t_serialized ) {
1235 /* this serial team was already used
1236 * TODO increase performance by making this locks more specific */
1237 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238
1239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1240
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001241#if OMPT_SUPPORT
1242 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1243#endif
1244
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001246#if OMPT_SUPPORT
1247 ompt_parallel_id,
1248#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001249#if OMP_40_ENABLED
1250 proc_bind,
1251#endif
1252 & this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL) );
1254 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1255 KMP_ASSERT( new_team );
1256
1257 /* setup new serialized team and install it */
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1262
1263 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1264 global_tid, serial_team ) );
1265
1266
1267 /* TODO the above breaks the requirement that if we run out of
1268 * resources, then we can still guarantee that serialized teams
1269 * are ok, since we may need to allocate a new one */
1270 } else {
1271 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1272 global_tid, serial_team ) );
1273 }
1274
1275 /* we have to initialize this serial team */
1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1277 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1278 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1279 serial_team->t.t_ident = loc;
1280 serial_team->t.t_serialized = 1;
1281 serial_team->t.t_nproc = 1;
1282 serial_team->t.t_parent = this_thr->th.th_team;
1283 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1284 this_thr->th.th_team = serial_team;
1285 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1286
1287 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1288 global_tid, this_thr->th.th_current_task ) );
1289 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1290 this_thr->th.th_current_task->td_flags.executing = 0;
1291
1292 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1293
1294 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1295 each serialized task represented by team->t.t_serialized? */
1296 copy_icvs(
1297 & this_thr->th.th_current_task->td_icvs,
1298 & this_thr->th.th_current_task->td_parent->td_icvs );
1299
1300 // Thread value exists in the nested nthreads array for the next nested level
1301 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1302 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1303 }
1304
1305#if OMP_40_ENABLED
1306 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1307 this_thr->th.th_current_task->td_icvs.proc_bind
1308 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1309 }
1310#endif /* OMP_40_ENABLED */
1311
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001312#if USE_DEBUGGER
1313 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1314#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315 this_thr->th.th_info.ds.ds_tid = 0;
1316
1317 /* set thread cache values */
1318 this_thr->th.th_team_nproc = 1;
1319 this_thr->th.th_team_master = this_thr;
1320 this_thr->th.th_team_serialized = 1;
1321
1322 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1323 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1324
1325 propagateFPControl (serial_team);
1326
1327 /* check if we need to allocate dispatch buffers stack */
1328 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1329 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1330 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1331 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1332 }
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1334
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335#if OMPT_SUPPORT
1336 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1337 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1338#endif
1339
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001340 KMP_MB();
1341
1342 } else {
1343 /* this serialized team is already being used,
1344 * that's fine, just add another nested level */
1345 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1347 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1348 ++ serial_team->t.t_serialized;
1349 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1350
1351 // Nested level will be an index in the nested nthreads array
1352 int level = this_thr->th.th_team->t.t_level;
1353 // Thread value exists in the nested nthreads array for the next nested level
1354 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1355 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1356 }
1357 serial_team->t.t_level++;
1358 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1359 global_tid, serial_team, serial_team->t.t_level ) );
1360
1361 /* allocate/push dispatch buffers stack */
1362 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1363 {
1364 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1365 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1366 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1367 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1368 }
1369 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1370
1371 KMP_MB();
1372 }
1373
1374 if ( __kmp_env_consistency_check )
1375 __kmp_push_parallel( global_tid, NULL );
1376
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001377}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001378
Jim Cownie5e8470a2013-09-27 10:38:44 +00001379/* most of the work for a fork */
1380/* return true if we really went parallel, false if serialized */
1381int
1382__kmp_fork_call(
1383 ident_t * loc,
1384 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001385 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001387#if OMPT_SUPPORT
1388 void *unwrapped_task,
1389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 microtask_t microtask,
1391 launch_t invoker,
1392/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001393#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394 va_list * ap
1395#else
1396 va_list ap
1397#endif
1398 )
1399{
1400 void **argv;
1401 int i;
1402 int master_tid;
1403 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001404 kmp_team_t *team;
1405 kmp_team_t *parent_team;
1406 kmp_info_t *master_th;
1407 kmp_root_t *root;
1408 int nthreads;
1409 int master_active;
1410 int master_set_numthreads;
1411 int level;
1412#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 int teams_level;
1415#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001416#if KMP_NESTED_HOT_TEAMS
1417 kmp_hot_team_ptr_t **p_hot_teams;
1418#endif
1419 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001420 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001421 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001422
1423 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001424 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1425 /* Some systems prefer the stack for the root thread(s) to start with */
1426 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001427 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 /* These 2 lines below are so this does not get optimized out */
1429 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1430 __kmp_stkpadding += (short)((kmp_int64)dummy);
1431 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432
1433 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001434 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435 if( ! TCR_4(__kmp_init_parallel) )
1436 __kmp_parallel_initialize();
1437
1438 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001439 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1440 parent_team = master_th->th.th_team;
1441 master_tid = master_th->th.th_info.ds.ds_tid;
1442 master_this_cons = master_th->th.th_local.this_construct;
1443 root = master_th->th.th_root;
1444 master_active = root->r.r_active;
1445 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001446
1447#if OMPT_SUPPORT
1448 ompt_parallel_id_t ompt_parallel_id;
1449 ompt_task_id_t ompt_task_id;
1450 ompt_frame_t *ompt_frame;
1451 ompt_task_id_t my_task_id;
1452 ompt_parallel_id_t my_parallel_id;
1453
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001454 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001455 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1456 ompt_task_id = __ompt_get_task_id_internal(0);
1457 ompt_frame = __ompt_get_task_frame_internal(0);
1458 }
1459#endif
1460
Jim Cownie5e8470a2013-09-27 10:38:44 +00001461 // Nested level will be an index in the nested nthreads array
1462 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001463 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001464#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001465 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1466#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001467#if KMP_NESTED_HOT_TEAMS
1468 p_hot_teams = &master_th->th.th_hot_teams;
1469 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1470 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1473 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1474 }
1475#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001476
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001477#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001478 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1480 int team_size = master_set_numthreads;
1481
1482 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1483 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001484 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001485 }
1486#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 master_th->th.th_ident = loc;
1489
1490#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001491 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1493 // AC: This is start of parallel that is nested inside teams construct.
1494 // The team is actual (hot), all workers are ready at the fork barrier.
1495 // No lock needed to initialize the team a bit, then free workers.
1496 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001497 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001498 parent_team->t.t_argc = argc;
1499 argv = (void**)parent_team->t.t_argv;
1500 for( i=argc-1; i >= 0; --i )
1501/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001502#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503 *argv++ = va_arg( *ap, void * );
1504#else
1505 *argv++ = va_arg( ap, void * );
1506#endif
1507 /* Increment our nested depth levels, but not increase the serialization */
1508 if ( parent_team == master_th->th.th_serial_team ) {
1509 // AC: we are in serialized parallel
1510 __kmpc_serialized_parallel(loc, gtid);
1511 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1512 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1513 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001514
1515#if OMPT_SUPPORT
1516 void *dummy;
1517 void **exit_runtime_p;
1518
1519 ompt_lw_taskteam_t lw_taskteam;
1520
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001521 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1523 unwrapped_task, ompt_parallel_id);
1524 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1525 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1526
1527 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1528
1529#if OMPT_TRACE
1530 /* OMPT implicit task begin */
1531 my_task_id = lw_taskteam.ompt_task_info.task_id;
1532 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001533 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001534 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1535 my_parallel_id, my_task_id);
1536 }
1537#endif
1538
1539 /* OMPT state */
1540 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1541 } else {
1542 exit_runtime_p = &dummy;
1543 }
1544#endif
1545
Jonathan Peyton45be4502015-08-11 21:36:41 +00001546 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001549 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001550#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001553 );
1554 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555
1556#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001557 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001558 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001559#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001560 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001562 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1564 ompt_parallel_id, ompt_task_id);
1565 }
1566
1567 __ompt_lw_taskteam_unlink(master_th);
1568 // reset clear the task id only after unlinking the task
1569 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1570#endif
1571
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001572 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001573 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001574 ompt_parallel_id, ompt_task_id,
1575 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001576 }
1577 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1578 }
1579#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 return TRUE;
1581 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001582
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001584#if OMPT_SUPPORT
1585 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1586#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 parent_team->t.t_invoke = invoker;
1588 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1589 parent_team->t.t_active_level ++;
1590 parent_team->t.t_level ++;
1591
1592 /* Change number of threads in the team if requested */
1593 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001594 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595 // AC: only can reduce the number of threads dynamically, cannot increase
1596 kmp_info_t **other_threads = parent_team->t.t_threads;
1597 parent_team->t.t_nproc = master_set_numthreads;
1598 for ( i = 0; i < master_set_numthreads; ++i ) {
1599 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1600 }
1601 // Keep extra threads hot in the team for possible next parallels
1602 }
1603 master_th->th.th_set_nproc = 0;
1604 }
1605
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001606#if USE_DEBUGGER
1607 if ( __kmp_debugging ) { // Let debugger override number of threads.
1608 int nth = __kmp_omp_num_threads( loc );
1609 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1610 master_set_numthreads = nth;
1611 }; // if
1612 }; // if
1613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
1615 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1616 __kmp_internal_fork( loc, gtid, parent_team );
1617 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1618
1619 /* Invoke microtask for MASTER thread */
1620 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1621 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1622
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001623 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001624 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1625 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626 if (! parent_team->t.t_invoke( gtid )) {
1627 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1628 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629 }
1630 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1631 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1632 KMP_MB(); /* Flush all pending memory write invalidates. */
1633
1634 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1635
1636 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001637 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638#endif /* OMP_40_ENABLED */
1639
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001640#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001642 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1647 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001648 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001649#if OMP_40_ENABLED
1650 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1651#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001652 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001653 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001654
1655 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1656 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1657 if (nthreads > 1) {
1658 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1659#if OMP_40_ENABLED
1660 && !enter_teams
1661#endif /* OMP_40_ENABLED */
1662 ) ) || ( __kmp_library == library_serial ) ) {
1663 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1664 gtid, nthreads ));
1665 nthreads = 1;
1666 }
1667 }
1668 if ( nthreads > 1 ) {
1669 /* determine how many new threads we can use */
1670 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1671
1672 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001674/* AC: If we execute teams from parallel region (on host), then teams should be created
1675 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1676 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001677 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001679 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001680 if ( nthreads == 1 ) {
1681 // Free lock for single thread execution here;
1682 // for multi-thread execution it will be freed later
1683 // after team of threads created and initialized
1684 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1685 }
1686 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687 }
1688 KMP_DEBUG_ASSERT( nthreads > 0 );
1689
1690 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001691 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693 /* create a serialized parallel region? */
1694 if ( nthreads == 1 ) {
1695 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001696#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697 void * args[ argc ];
1698#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001699 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001700#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1703
1704 __kmpc_serialized_parallel(loc, gtid);
1705
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709#if OMP_40_ENABLED
1710 if ( !ap ) {
1711 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001712 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001714
1715#if OMPT_SUPPORT
1716 void *dummy;
1717 void **exit_runtime_p;
1718
1719 ompt_lw_taskteam_t lw_taskteam;
1720
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001721 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001722 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1723 unwrapped_task, ompt_parallel_id);
1724 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1725 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1726
1727 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1728
1729#if OMPT_TRACE
1730 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001731 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001732 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1733 ompt_parallel_id, my_task_id);
1734 }
1735#endif
1736
1737 /* OMPT state */
1738 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1739 } else {
1740 exit_runtime_p = &dummy;
1741 }
1742#endif
1743
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001744 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001745 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1746 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001747 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1748#if OMPT_SUPPORT
1749 , exit_runtime_p
1750#endif
1751 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001752 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753
1754#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001755 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001756 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001757 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758
1759#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001760 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1762 ompt_parallel_id, ompt_task_id);
1763 }
1764#endif
1765
1766 __ompt_lw_taskteam_unlink(master_th);
1767 // reset clear the task id only after unlinking the task
1768 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1769
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001770 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001772 ompt_parallel_id, ompt_task_id,
1773 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774 }
1775 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1776 }
1777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1779 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1780 team = master_th->th.th_team;
1781 //team->t.t_pkfn = microtask;
1782 team->t.t_invoke = invoker;
1783 __kmp_alloc_argv_entries( argc, team, TRUE );
1784 team->t.t_argc = argc;
1785 argv = (void**) team->t.t_argv;
1786 if ( ap ) {
1787 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001788// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001789# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001791# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001793# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 } else {
1795 for( i=0; i < argc; ++i )
1796 // Get args from parent team for teams construct
1797 argv[i] = parent_team->t.t_argv[i];
1798 }
1799 // AC: revert change made in __kmpc_serialized_parallel()
1800 // because initial code in teams should have level=0
1801 team->t.t_level--;
1802 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001804 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1805 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806 invoker(gtid);
1807 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 } else {
1809#endif /* OMP_40_ENABLED */
1810 argv = args;
1811 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001813#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819
1820#if OMPT_SUPPORT
1821 void *dummy;
1822 void **exit_runtime_p;
1823
1824 ompt_lw_taskteam_t lw_taskteam;
1825
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001826 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001827 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1828 unwrapped_task, ompt_parallel_id);
1829 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1830 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1831
1832 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1833
1834#if OMPT_TRACE
1835 /* OMPT implicit task begin */
1836 my_task_id = lw_taskteam.ompt_task_info.task_id;
1837 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001838 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001839 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1840 my_parallel_id, my_task_id);
1841 }
1842#endif
1843
1844 /* OMPT state */
1845 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1846 } else {
1847 exit_runtime_p = &dummy;
1848 }
1849#endif
1850
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001854 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1855#if OMPT_SUPPORT
1856 , exit_runtime_p
1857#endif
1858 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001859 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860
1861#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001862 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001863 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001865 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001866
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001867 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001868 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1869 my_parallel_id, my_task_id);
1870 }
1871#endif
1872
1873 __ompt_lw_taskteam_unlink(master_th);
1874 // reset clear the task id only after unlinking the task
1875 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1876
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001877 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001878 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001879 ompt_parallel_id, ompt_task_id,
1880 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001881 }
1882 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1883 }
1884#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885#if OMP_40_ENABLED
1886 }
1887#endif /* OMP_40_ENABLED */
1888 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001889 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001890#if OMPT_SUPPORT
1891 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1892 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1893 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1894 unwrapped_task, ompt_parallel_id);
1895
1896 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001897 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001898 __ompt_lw_taskteam_link(lwt, master_th);
1899#endif
1900
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001901 // we were called from GNU native code
1902 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1903 return FALSE;
1904 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 }
1908
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001910 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 KMP_MB();
1912 return FALSE;
1913 }
1914
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915 // GEH: only modify the executing flag in the case when not serialized
1916 // serialized case is handled in kmpc_serialized_parallel
1917 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001918 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1919 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1921 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1922 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923
1924#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001925 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926#endif /* OMP_40_ENABLED */
1927 {
1928 /* Increment our nested depth level */
1929 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1930 }
1931
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001934 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1935 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936 }
1937 else {
1938 nthreads_icv = 0; // don't update
1939 }
1940
1941#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001944 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1946 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 }
1948 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 if (proc_bind == proc_bind_default) {
1950 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1951 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001953 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1954 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001956 if ((level+1 < __kmp_nested_proc_bind.used)
1957 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1958 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 }
1960 }
1961
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963 master_th->th.th_set_proc_bind = proc_bind_default;
1964#endif /* OMP_40_ENABLED */
1965
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001966 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001972 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 new_icvs.nproc = nthreads_icv;
1976 }
1977
1978#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001979 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 new_icvs.proc_bind = proc_bind_icv;
1981 }
1982#endif /* OMP_40_ENABLED */
1983
1984 /* allocate a new parallel team */
1985 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1986 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001987#if OMPT_SUPPORT
1988 ompt_parallel_id,
1989#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001991 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1994 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995 /* allocate a new parallel team */
1996 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1997 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001998#if OMPT_SUPPORT
1999 ompt_parallel_id,
2000#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002002 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002004 &master_th->th.th_current_task->td_icvs, argc
2005 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002007 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008
2009 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002010 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2011 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2012 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2013 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2014 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002015#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002016 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002018 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002019 // TODO: parent_team->t.t_level == INT_MAX ???
2020#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002021 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002023 int new_level = parent_team->t.t_level + 1;
2024 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2025 new_level = parent_team->t.t_active_level + 1;
2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#if OMP_40_ENABLED
2028 } else {
2029 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002030 int new_level = parent_team->t.t_level;
2031 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2032 new_level = parent_team->t.t_active_level;
2033 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034 }
2035#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002036 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002037 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002038 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002040#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002041 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002042#endif
2043
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 // Update the floating point rounding in the team if required.
2045 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046
2047 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002048 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002049#if 0
2050 // Patch out an assertion that trips while the runtime seems to operate correctly.
2051 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002052 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002053#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002055 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002056 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002057
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002058 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002059 // Take a memo of master's task_state
2060 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2061 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002062 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2063 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002064 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2067 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2068 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002069 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2070 new_stack[i] = 0;
2071 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002072 old_stack = master_th->th.th_task_state_memo_stack;
2073 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002074 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002075 __kmp_free(old_stack);
2076 }
2077 // Store master's task_state on stack
2078 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2079 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002080#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002081 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002082 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2083 }
2084 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002086 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002087#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002088 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002089#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002090 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002091#if !KMP_NESTED_HOT_TEAMS
2092 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2093#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002094 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
2096 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2097 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2098 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2099 ( team->t.t_master_tid == 0 &&
2100 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2101 KMP_MB();
2102
2103 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002104 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105#if OMP_40_ENABLED
2106 if ( ap ) {
2107#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002108 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002110#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002115 KMP_CHECK_UPDATE(*argv, new_argv);
2116 argv++;
2117 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118#if OMP_40_ENABLED
2119 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002120 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002121 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002122 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124 }
2125#endif /* OMP_40_ENABLED */
2126
2127 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002128 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2130 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002131
2132 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002133 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002135#if OMPT_SUPPORT
2136 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2137#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138
2139 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2140
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002144 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002146 ) {
2147#if USE_ITT_NOTIFY
2148 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2149 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002150 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002151 kmp_uint64 tmp_time = 0;
2152 if ( __itt_get_timestamp_ptr )
2153 tmp_time = __itt_get_timestamp();
2154 // Internal fork - report frame begin
2155 master_th->th.th_frame_time = tmp_time;
2156 if ( __kmp_forkjoin_frames_mode == 3 )
2157 team->t.t_region_time = tmp_time;
2158 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2159#endif /* USE_ITT_NOTIFY */
2160 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2161 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2162 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002163 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2164 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002165 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166#endif /* USE_ITT_BUILD */
2167
2168 /* now go on and do the work */
2169 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2170 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002171 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2172 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002173
2174#if USE_ITT_BUILD
2175 if ( __itt_stack_caller_create_ptr ) {
2176 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2177 }
2178#endif /* USE_ITT_BUILD */
2179
2180#if OMP_40_ENABLED
2181 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2182#endif /* OMP_40_ENABLED */
2183 {
2184 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2186 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187 }
2188
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002189 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2191 return TRUE;
2192 }
2193
2194 /* Invoke microtask for MASTER thread */
2195 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2196 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002197 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002198
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002200 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2201 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002202 if (! team->t.t_invoke( gtid )) {
2203 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205 }
2206 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2207 gtid, team->t.t_id, team->t.t_pkfn ) );
2208 KMP_MB(); /* Flush all pending memory write invalidates. */
2209
2210 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2211
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002212#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002213 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002214 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2215 }
2216#endif
2217
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 return TRUE;
2219}
2220
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002221#if OMPT_SUPPORT
2222static inline void
2223__kmp_join_restore_state(
2224 kmp_info_t *thread,
2225 kmp_team_t *team)
2226{
2227 // restore state outside the region
2228 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2229 ompt_state_work_serial : ompt_state_work_parallel);
2230}
2231
2232static inline void
2233__kmp_join_ompt(
2234 kmp_info_t *thread,
2235 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002236 ompt_parallel_id_t parallel_id,
2237 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002238{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002239 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002241 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002242 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243 }
2244
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002245 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002246 __kmp_join_restore_state(thread,team);
2247}
2248#endif
2249
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002251__kmp_join_call(ident_t *loc, int gtid
2252#if OMPT_SUPPORT
2253 , enum fork_context_e fork_context
2254#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255#if OMP_40_ENABLED
2256 , int exit_teams
2257#endif /* OMP_40_ENABLED */
2258)
2259{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00002260 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002261 kmp_team_t *team;
2262 kmp_team_t *parent_team;
2263 kmp_info_t *master_th;
2264 kmp_root_t *root;
2265 int master_active;
2266 int i;
2267
2268 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2269
2270 /* setup current data */
2271 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002272 root = master_th->th.th_root;
2273 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274 parent_team = team->t.t_parent;
2275
2276 master_th->th.th_ident = loc;
2277
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002278#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002279 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002280 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2281 }
2282#endif
2283
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002284#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2286 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2287 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002288 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2289 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002290 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002291#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292
2293 if( team->t.t_serialized ) {
2294#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002295 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296 // We are in teams construct
2297 int level = team->t.t_level;
2298 int tlevel = master_th->th.th_teams_level;
2299 if ( level == tlevel ) {
2300 // AC: we haven't incremented it earlier at start of teams construct,
2301 // so do it here - at the end of teams construct
2302 team->t.t_level++;
2303 } else if ( level == tlevel + 1 ) {
2304 // AC: we are exiting parallel inside teams, need to increment serialization
2305 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2306 team->t.t_serialized++;
2307 }
2308 }
2309#endif /* OMP_40_ENABLED */
2310 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002311
2312#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002313 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002314 __kmp_join_restore_state(master_th, parent_team);
2315 }
2316#endif
2317
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318 return;
2319 }
2320
2321 master_active = team->t.t_master_active;
2322
2323#if OMP_40_ENABLED
2324 if (!exit_teams)
2325#endif /* OMP_40_ENABLED */
2326 {
2327 // AC: No barrier for internal teams at exit from teams construct.
2328 // But there is barrier for external team (league).
2329 __kmp_internal_join( loc, gtid, team );
2330 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002331#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002332 else {
2333 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2334 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002335#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002336
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337 KMP_MB();
2338
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002339#if OMPT_SUPPORT
2340 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2341#endif
2342
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343#if USE_ITT_BUILD
2344 if ( __itt_stack_caller_create_ptr ) {
2345 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2346 }
2347
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002348 // Mark end of "parallel" region for VTune.
2349 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002350# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002351 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002353 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002354 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002355 // only one notification scheme (either "submit" or "forking/joined", not both)
2356 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2357 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2358 0, loc, master_th->th.th_team_nproc, 1 );
2359 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2360 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2361 __kmp_itt_region_joined( gtid );
2362 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363#endif /* USE_ITT_BUILD */
2364
2365#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002366 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367 !exit_teams &&
2368 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2369 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2370 // AC: We need to leave the team structure intact at the end
2371 // of parallel inside the teams construct, so that at the next
2372 // parallel same (hot) team works, only adjust nesting levels
2373
2374 /* Decrement our nested depth level */
2375 team->t.t_level --;
2376 team->t.t_active_level --;
2377 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2378
2379 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002380 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002381 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383 kmp_info_t **other_threads = team->t.t_threads;
2384 team->t.t_nproc = new_num;
2385 for ( i = 0; i < old_num; ++i ) {
2386 other_threads[i]->th.th_team_nproc = new_num;
2387 }
2388 // Adjust states of non-used threads of the team
2389 for ( i = old_num; i < new_num; ++i ) {
2390 // Re-initialize thread's barrier data.
2391 int b;
2392 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002393 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002394 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002395 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002396#if USE_DEBUGGER
2397 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2398#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002399 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002400 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2401 // Synchronize thread's task state
2402 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2403 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 }
2405 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002406
2407#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002408 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002409 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002410 }
2411#endif
2412
Jim Cownie5e8470a2013-09-27 10:38:44 +00002413 return;
2414 }
2415#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002417 /* do cleanup and restore the parent team */
2418 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2419 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2420
2421 master_th->th.th_dispatch =
2422 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423
2424 /* jc: The following lock has instructions with REL and ACQ semantics,
2425 separating the parallel user code called in this parallel region
2426 from the serial user code called after this function returns.
2427 */
2428 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2429
2430#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002431 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432#endif /* OMP_40_ENABLED */
2433 {
2434 /* Decrement our nested depth level */
2435 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2436 }
2437 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2438
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002439#if OMPT_SUPPORT && OMPT_TRACE
2440 if(ompt_enabled){
2441 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2442 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2443 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2444 parallel_id, task_info->task_id);
2445 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002446 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002447 task_info->task_id = 0;
2448 }
2449#endif
2450
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2452 0, master_th, team ) );
2453 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454
Alp Toker98758b02014-03-02 04:12:06 +00002455#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456 //
2457 // Restore master thread's partition.
2458 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 master_th->th.th_first_place = team->t.t_first_place;
2460 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461#endif /* OMP_40_ENABLED */
2462
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002463 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002465 if ( root->r.r_active != master_active )
2466 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002468 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
2470 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002471 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002472 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002473 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 * assertion failures. it's only one deref&assign so might as well put this
2475 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002476 master_th->th.th_team = parent_team;
2477 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2478 master_th->th.th_team_master = parent_team->t.t_threads[0];
2479 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480
2481 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002482 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483 parent_team != master_th->th.th_serial_team &&
2484 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002485 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2486 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002487 }
2488
Jim Cownie5e8470a2013-09-27 10:38:44 +00002489 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002490 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2491 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2492 // Remember master's state if we re-use this nested hot team
2493 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002494 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002495 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002496 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002497 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002498 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002499 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002501 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002504 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2505 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2506 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507
2508 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2509
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002510#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002511 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002512 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002513 }
2514#endif
2515
Jim Cownie5e8470a2013-09-27 10:38:44 +00002516 KMP_MB();
2517 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2518}
2519
2520/* ------------------------------------------------------------------------ */
2521/* ------------------------------------------------------------------------ */
2522
2523/* Check whether we should push an internal control record onto the
2524 serial team stack. If so, do it. */
2525void
2526__kmp_save_internal_controls ( kmp_info_t * thread )
2527{
2528
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002529 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002530 return;
2531 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002532 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533 int push = 0;
2534
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002535 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 push = 1;
2537 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002538 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2539 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540 push = 1;
2541 }
2542 }
2543 if (push) { /* push a record on the serial team's stack */
2544 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2545
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002547
2548 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2549
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002550 control->next = thread->th.th_team->t.t_control_stack_top;
2551 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552 }
2553 }
2554}
2555
2556/* Changes set_nproc */
2557void
2558__kmp_set_num_threads( int new_nth, int gtid )
2559{
2560 kmp_info_t *thread;
2561 kmp_root_t *root;
2562
2563 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2564 KMP_DEBUG_ASSERT( __kmp_init_serial );
2565
2566 if (new_nth < 1)
2567 new_nth = 1;
2568 else if (new_nth > __kmp_max_nth)
2569 new_nth = __kmp_max_nth;
2570
Jonathan Peyton45be4502015-08-11 21:36:41 +00002571 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 thread = __kmp_threads[gtid];
2573
2574 __kmp_save_internal_controls( thread );
2575
2576 set__nproc( thread, new_nth );
2577
2578 //
2579 // If this omp_set_num_threads() call will cause the hot team size to be
2580 // reduced (in the absence of a num_threads clause), then reduce it now,
2581 // rather than waiting for the next parallel region.
2582 //
2583 root = thread->th.th_root;
2584 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002585 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2586#if KMP_NESTED_HOT_TEAMS
2587 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2588#endif
2589 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002590 kmp_team_t *hot_team = root->r.r_hot_team;
2591 int f;
2592
2593 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2594
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2597 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002598 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2599 // When decreasing team size, threads no longer in the team should unref task team.
2600 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2601 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002602 __kmp_free_thread( hot_team->t.t_threads[f] );
2603 hot_team->t.t_threads[f] = NULL;
2604 }
2605 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002606#if KMP_NESTED_HOT_TEAMS
2607 if( thread->th.th_hot_teams ) {
2608 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2609 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2610 }
2611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2614
2615 //
2616 // Update the t_nproc field in the threads that are still active.
2617 //
2618 for( f=0 ; f < new_nth; f++ ) {
2619 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2620 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002623 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002625}
2626
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627/* Changes max_active_levels */
2628void
2629__kmp_set_max_active_levels( int gtid, int max_active_levels )
2630{
2631 kmp_info_t *thread;
2632
2633 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2634 KMP_DEBUG_ASSERT( __kmp_init_serial );
2635
2636 // validate max_active_levels
2637 if( max_active_levels < 0 ) {
2638 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2639 // We ignore this call if the user has specified a negative value.
2640 // The current setting won't be changed. The last valid setting will be used.
2641 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2642 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2643 return;
2644 }
2645 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2646 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2647 // We allow a zero value. (implementation defined behavior)
2648 } else {
2649 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2650 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2651 // Current upper limit is MAX_INT. (implementation defined behavior)
2652 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2653 // Actually, the flow should never get here until we use MAX_INT limit.
2654 }
2655 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2656
2657 thread = __kmp_threads[ gtid ];
2658
2659 __kmp_save_internal_controls( thread );
2660
2661 set__max_active_levels( thread, max_active_levels );
2662
2663}
2664
2665/* Gets max_active_levels */
2666int
2667__kmp_get_max_active_levels( int gtid )
2668{
2669 kmp_info_t *thread;
2670
2671 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2672 KMP_DEBUG_ASSERT( __kmp_init_serial );
2673
2674 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002675 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002677 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2678 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002679}
2680
2681/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2682void
2683__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2684{
2685 kmp_info_t *thread;
2686// kmp_team_t *team;
2687
2688 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2689 KMP_DEBUG_ASSERT( __kmp_init_serial );
2690
2691 // Check if the kind parameter is valid, correct if needed.
2692 // Valid parameters should fit in one of two intervals - standard or extended:
2693 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2694 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2695 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2696 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2697 {
2698 // TODO: Hint needs attention in case we change the default schedule.
2699 __kmp_msg(
2700 kmp_ms_warning,
2701 KMP_MSG( ScheduleKindOutOfRange, kind ),
2702 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2703 __kmp_msg_null
2704 );
2705 kind = kmp_sched_default;
2706 chunk = 0; // ignore chunk value in case of bad kind
2707 }
2708
2709 thread = __kmp_threads[ gtid ];
2710
2711 __kmp_save_internal_controls( thread );
2712
2713 if ( kind < kmp_sched_upper_std ) {
2714 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2715 // differ static chunked vs. unchunked:
2716 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002717 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 }
2721 } else {
2722 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002723 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2725 }
2726 if ( kind == kmp_sched_auto ) {
2727 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002728 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002730 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731 }
2732}
2733
2734/* Gets def_sched_var ICV values */
2735void
2736__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2737{
2738 kmp_info_t *thread;
2739 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740
2741 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2742 KMP_DEBUG_ASSERT( __kmp_init_serial );
2743
2744 thread = __kmp_threads[ gtid ];
2745
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002746 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747
2748 switch ( th_type ) {
2749 case kmp_sch_static:
2750 case kmp_sch_static_greedy:
2751 case kmp_sch_static_balanced:
2752 *kind = kmp_sched_static;
2753 *chunk = 0; // chunk was not set, try to show this fact via zero value
2754 return;
2755 case kmp_sch_static_chunked:
2756 *kind = kmp_sched_static;
2757 break;
2758 case kmp_sch_dynamic_chunked:
2759 *kind = kmp_sched_dynamic;
2760 break;
2761 case kmp_sch_guided_chunked:
2762 case kmp_sch_guided_iterative_chunked:
2763 case kmp_sch_guided_analytical_chunked:
2764 *kind = kmp_sched_guided;
2765 break;
2766 case kmp_sch_auto:
2767 *kind = kmp_sched_auto;
2768 break;
2769 case kmp_sch_trapezoidal:
2770 *kind = kmp_sched_trapezoidal;
2771 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002772#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773 case kmp_sch_static_steal:
2774 *kind = kmp_sched_static_steal;
2775 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002776#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777 default:
2778 KMP_FATAL( UnknownSchedulingType, th_type );
2779 }
2780
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002781 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782}
2783
2784int
2785__kmp_get_ancestor_thread_num( int gtid, int level ) {
2786
2787 int ii, dd;
2788 kmp_team_t *team;
2789 kmp_info_t *thr;
2790
2791 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2792 KMP_DEBUG_ASSERT( __kmp_init_serial );
2793
2794 // validate level
2795 if( level == 0 ) return 0;
2796 if( level < 0 ) return -1;
2797 thr = __kmp_threads[ gtid ];
2798 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002799 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800 if( level > ii ) return -1;
2801
2802#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002803 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002804 // AC: we are in teams region where multiple nested teams have same level
2805 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2806 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2807 KMP_DEBUG_ASSERT( ii >= tlevel );
2808 // AC: As we need to pass by the teams league, we need to artificially increase ii
2809 if ( ii == tlevel ) {
2810 ii += 2; // three teams have same level
2811 } else {
2812 ii ++; // two teams have same level
2813 }
2814 }
2815 }
2816#endif
2817
2818 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2819
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002820 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821 level++;
2822 while( ii > level )
2823 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002824 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825 {
2826 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002827 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828 team = team->t.t_parent;
2829 continue;
2830 }
2831 if( ii > level ) {
2832 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002833 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002834 ii--;
2835 }
2836 }
2837
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002838 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839}
2840
2841int
2842__kmp_get_team_size( int gtid, int level ) {
2843
2844 int ii, dd;
2845 kmp_team_t *team;
2846 kmp_info_t *thr;
2847
2848 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2849 KMP_DEBUG_ASSERT( __kmp_init_serial );
2850
2851 // validate level
2852 if( level == 0 ) return 1;
2853 if( level < 0 ) return -1;
2854 thr = __kmp_threads[ gtid ];
2855 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002856 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857 if( level > ii ) return -1;
2858
2859#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002860 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002861 // AC: we are in teams region where multiple nested teams have same level
2862 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2863 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2864 KMP_DEBUG_ASSERT( ii >= tlevel );
2865 // AC: As we need to pass by the teams league, we need to artificially increase ii
2866 if ( ii == tlevel ) {
2867 ii += 2; // three teams have same level
2868 } else {
2869 ii ++; // two teams have same level
2870 }
2871 }
2872 }
2873#endif
2874
2875 while( ii > level )
2876 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002877 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002878 {
2879 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002880 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 team = team->t.t_parent;
2882 continue;
2883 }
2884 if( ii > level ) {
2885 team = team->t.t_parent;
2886 ii--;
2887 }
2888 }
2889
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002890 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002891}
2892
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893kmp_r_sched_t
2894__kmp_get_schedule_global() {
2895// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2896// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2897
2898 kmp_r_sched_t r_sched;
2899
2900 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2901 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2902 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2903 if ( __kmp_sched == kmp_sch_static ) {
2904 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2905 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2906 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2907 } else {
2908 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2909 }
2910
2911 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2912 r_sched.chunk = KMP_DEFAULT_CHUNK;
2913 } else {
2914 r_sched.chunk = __kmp_chunk;
2915 }
2916
2917 return r_sched;
2918}
2919
2920/* ------------------------------------------------------------------------ */
2921/* ------------------------------------------------------------------------ */
2922
2923
2924/*
2925 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2926 * at least argc number of *t_argv entries for the requested team.
2927 */
2928static void
2929__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2930{
2931
2932 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002933 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002934
2935 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2936 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002938 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2939 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940
2941 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2942 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002943 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2945 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002946 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947 if ( __kmp_storage_map ) {
2948 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2949 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2950 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2951 "team_%d.t_inline_argv",
2952 team->t.t_id );
2953 }
2954 } else {
2955 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002956 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2958 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2959 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002960 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002961 if ( __kmp_storage_map ) {
2962 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2963 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2964 team->t.t_id );
2965 }
2966 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967 }
2968}
2969
2970static void
2971__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2972{
2973 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002974 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2976 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002978 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002979 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980 team->t.t_max_nproc = max_nth;
2981
2982 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002983 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002984 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002985#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002986 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2987#endif
2988 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002989}
2990
2991static void
2992__kmp_free_team_arrays(kmp_team_t *team) {
2993 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2994 int i;
2995 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2996 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2997 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2998 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2999 }; // if
3000 }; // for
3001 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00003002 __kmp_free(team->t.t_disp_buffer);
3003 __kmp_free(team->t.t_dispatch);
3004 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005 team->t.t_threads = NULL;
3006 team->t.t_disp_buffer = NULL;
3007 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003008 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009}
3010
3011static void
3012__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3013 kmp_info_t **oldThreads = team->t.t_threads;
3014
Jonathan Peytona58563d2016-03-29 20:05:27 +00003015 __kmp_free(team->t.t_disp_buffer);
3016 __kmp_free(team->t.t_dispatch);
3017 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003018 __kmp_allocate_team_arrays(team, max_nth);
3019
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003020 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003021
3022 __kmp_free(oldThreads);
3023}
3024
3025static kmp_internal_control_t
3026__kmp_get_global_icvs( void ) {
3027
Jim Cownie5e8470a2013-09-27 10:38:44 +00003028 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003029
3030#if OMP_40_ENABLED
3031 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3032#endif /* OMP_40_ENABLED */
3033
3034 kmp_internal_control_t g_icvs = {
3035 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003036 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3037 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3038 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003040#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003042#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003043 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3044 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003045 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3046 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047#if OMP_40_ENABLED
3048 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003049 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050#endif /* OMP_40_ENABLED */
3051 NULL //struct kmp_internal_control *next;
3052 };
3053
3054 return g_icvs;
3055}
3056
3057static kmp_internal_control_t
3058__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3059
Jim Cownie5e8470a2013-09-27 10:38:44 +00003060 kmp_internal_control_t gx_icvs;
3061 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3062 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3063 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003064
3065 return gx_icvs;
3066}
3067
3068static void
3069__kmp_initialize_root( kmp_root_t *root )
3070{
3071 int f;
3072 kmp_team_t *root_team;
3073 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003075 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3076 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 KMP_DEBUG_ASSERT( root );
3078 KMP_ASSERT( ! root->r.r_begin );
3079
3080 /* setup the root state structure */
3081 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003082 root->r.r_begin = FALSE;
3083 root->r.r_active = FALSE;
3084 root->r.r_in_parallel = 0;
3085 root->r.r_blocktime = __kmp_dflt_blocktime;
3086 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087
3088 /* setup the root team for this task */
3089 /* allocate the root team structure */
3090 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003091
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092 root_team =
3093 __kmp_allocate_team(
3094 root,
3095 1, // new_nproc
3096 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003097#if OMPT_SUPPORT
3098 0, // root parallel id
3099#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100#if OMP_40_ENABLED
3101 __kmp_nested_proc_bind.bind_types[0],
3102#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003105 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003107#if USE_DEBUGGER
3108 // Non-NULL value should be assigned to make the debugger display the root team.
3109 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3110#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003111
3112 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3113
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003114 root->r.r_root_team = root_team;
3115 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003116
3117 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003118 root_team->t.t_threads[0] = NULL;
3119 root_team->t.t_nproc = 1;
3120 root_team->t.t_serialized = 1;
3121 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3122 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3123 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3125 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3126
3127 /* setup the hot team for this task */
3128 /* allocate the hot team structure */
3129 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003130
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131 hot_team =
3132 __kmp_allocate_team(
3133 root,
3134 1, // new_nproc
3135 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003136#if OMPT_SUPPORT
3137 0, // root parallel id
3138#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139#if OMP_40_ENABLED
3140 __kmp_nested_proc_bind.bind_types[0],
3141#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003142 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003144 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145 );
3146 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3147
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003148 root->r.r_hot_team = hot_team;
3149 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003150
3151 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003152 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003153
3154 /* initialize hot team */
3155 hot_team_max_nth = hot_team->t.t_max_nproc;
3156 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003157 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003158 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003159 hot_team->t.t_nproc = 1;
3160 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3161 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3162 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003163 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003164}
3165
3166#ifdef KMP_DEBUG
3167
3168
3169typedef struct kmp_team_list_item {
3170 kmp_team_p const * entry;
3171 struct kmp_team_list_item * next;
3172} kmp_team_list_item_t;
3173typedef kmp_team_list_item_t * kmp_team_list_t;
3174
3175
3176static void
3177__kmp_print_structure_team_accum( // Add team to list of teams.
3178 kmp_team_list_t list, // List of teams.
3179 kmp_team_p const * team // Team to add.
3180) {
3181
3182 // List must terminate with item where both entry and next are NULL.
3183 // Team is added to the list only once.
3184 // List is sorted in ascending order by team id.
3185 // Team id is *not* a key.
3186
3187 kmp_team_list_t l;
3188
3189 KMP_DEBUG_ASSERT( list != NULL );
3190 if ( team == NULL ) {
3191 return;
3192 }; // if
3193
3194 __kmp_print_structure_team_accum( list, team->t.t_parent );
3195 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3196
3197 // Search list for the team.
3198 l = list;
3199 while ( l->next != NULL && l->entry != team ) {
3200 l = l->next;
3201 }; // while
3202 if ( l->next != NULL ) {
3203 return; // Team has been added before, exit.
3204 }; // if
3205
3206 // Team is not found. Search list again for insertion point.
3207 l = list;
3208 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3209 l = l->next;
3210 }; // while
3211
3212 // Insert team.
3213 {
3214 kmp_team_list_item_t * item =
3215 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3216 * item = * l;
3217 l->entry = team;
3218 l->next = item;
3219 }
3220
3221}
3222
3223static void
3224__kmp_print_structure_team(
3225 char const * title,
3226 kmp_team_p const * team
3227
3228) {
3229 __kmp_printf( "%s", title );
3230 if ( team != NULL ) {
3231 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3232 } else {
3233 __kmp_printf( " - (nil)\n" );
3234 }; // if
3235}
3236
3237static void
3238__kmp_print_structure_thread(
3239 char const * title,
3240 kmp_info_p const * thread
3241
3242) {
3243 __kmp_printf( "%s", title );
3244 if ( thread != NULL ) {
3245 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3246 } else {
3247 __kmp_printf( " - (nil)\n" );
3248 }; // if
3249}
3250
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003251void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003252__kmp_print_structure(
3253 void
3254) {
3255
3256 kmp_team_list_t list;
3257
3258 // Initialize list of teams.
3259 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3260 list->entry = NULL;
3261 list->next = NULL;
3262
3263 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3264 {
3265 int gtid;
3266 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3267 __kmp_printf( "%2d", gtid );
3268 if ( __kmp_threads != NULL ) {
3269 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3270 }; // if
3271 if ( __kmp_root != NULL ) {
3272 __kmp_printf( " %p", __kmp_root[ gtid ] );
3273 }; // if
3274 __kmp_printf( "\n" );
3275 }; // for gtid
3276 }
3277
3278 // Print out __kmp_threads array.
3279 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3280 if ( __kmp_threads != NULL ) {
3281 int gtid;
3282 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3283 kmp_info_t const * thread = __kmp_threads[ gtid ];
3284 if ( thread != NULL ) {
3285 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3286 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3287 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3288 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3289 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3290 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3291 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3292 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3293#if OMP_40_ENABLED
3294 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3295#endif
3296 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3297 __kmp_printf( "\n" );
3298 __kmp_print_structure_team_accum( list, thread->th.th_team );
3299 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3300 }; // if
3301 }; // for gtid
3302 } else {
3303 __kmp_printf( "Threads array is not allocated.\n" );
3304 }; // if
3305
3306 // Print out __kmp_root array.
3307 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3308 if ( __kmp_root != NULL ) {
3309 int gtid;
3310 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3311 kmp_root_t const * root = __kmp_root[ gtid ];
3312 if ( root != NULL ) {
3313 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3314 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3315 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3316 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3317 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3318 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3319 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3320 __kmp_printf( "\n" );
3321 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3322 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3323 }; // if
3324 }; // for gtid
3325 } else {
3326 __kmp_printf( "Ubers array is not allocated.\n" );
3327 }; // if
3328
3329 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3330 while ( list->next != NULL ) {
3331 kmp_team_p const * team = list->entry;
3332 int i;
3333 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3334 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3335 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3336 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3337 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3338 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3339 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3340 __kmp_printf( " Thread %2d: ", i );
3341 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3342 }; // for i
3343 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3344 __kmp_printf( "\n" );
3345 list = list->next;
3346 }; // while
3347
3348 // Print out __kmp_thread_pool and __kmp_team_pool.
3349 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3350 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3351 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3352 __kmp_printf( "\n" );
3353
3354 // Free team list.
3355 while ( list != NULL ) {
3356 kmp_team_list_item_t * item = list;
3357 list = list->next;
3358 KMP_INTERNAL_FREE( item );
3359 }; // while
3360
3361}
3362
3363#endif
3364
3365
3366//---------------------------------------------------------------------------
3367// Stuff for per-thread fast random number generator
3368// Table of primes
3369
3370static const unsigned __kmp_primes[] = {
3371 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3372 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3373 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3374 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3375 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3376 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3377 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3378 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3379 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3380 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3381 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3382 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3383 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3384 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3385 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3386 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3387};
3388
3389//---------------------------------------------------------------------------
3390// __kmp_get_random: Get a random number using a linear congruential method.
3391
3392unsigned short
3393__kmp_get_random( kmp_info_t * thread )
3394{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003395 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396 unsigned short r = x>>16;
3397
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003398 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003399
3400 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3401 thread->th.th_info.ds.ds_tid, r) );
3402
3403 return r;
3404}
3405//--------------------------------------------------------
3406// __kmp_init_random: Initialize a random number generator
3407
3408void
3409__kmp_init_random( kmp_info_t * thread )
3410{
3411 unsigned seed = thread->th.th_info.ds.ds_tid;
3412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003413 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3414 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3415 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003416}
3417
3418
3419#if KMP_OS_WINDOWS
3420/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3421static int
3422__kmp_reclaim_dead_roots(void) {
3423 int i, r = 0;
3424
3425 for(i = 0; i < __kmp_threads_capacity; ++i) {
3426 if( KMP_UBER_GTID( i ) &&
3427 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3428 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3429 r += __kmp_unregister_root_other_thread(i);
3430 }
3431 }
3432 return r;
3433}
3434#endif
3435
3436/*
3437 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3438 free entries generated.
3439
3440 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3441 already dead.
3442
3443 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3444 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3445 __kmp_tp_capacity, if threadprivate cache array has been created.
3446 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3447
3448 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3449 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3450 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3451 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3452 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3453 as many free slots as possible up to nWish.
3454
3455 If any argument is negative, the behavior is undefined.
3456*/
3457static int
3458__kmp_expand_threads(int nWish, int nNeed) {
3459 int added = 0;
3460 int old_tp_cached;
3461 int __kmp_actual_max_nth;
3462
3463 if(nNeed > nWish) /* normalize the arguments */
3464 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003465#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466/* only for Windows static library */
3467 /* reclaim array entries for root threads that are already dead */
3468 added = __kmp_reclaim_dead_roots();
3469
3470 if(nNeed) {
3471 nNeed -= added;
3472 if(nNeed < 0)
3473 nNeed = 0;
3474 }
3475 if(nWish) {
3476 nWish -= added;
3477 if(nWish < 0)
3478 nWish = 0;
3479 }
3480#endif
3481 if(nWish <= 0)
3482 return added;
3483
3484 while(1) {
3485 int nTarget;
3486 int minimumRequiredCapacity;
3487 int newCapacity;
3488 kmp_info_t **newThreads;
3489 kmp_root_t **newRoot;
3490
3491 //
3492 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3493 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3494 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3495 // become > __kmp_max_nth in one of two ways:
3496 //
3497 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3498 // may not be resused by another thread, so we may need to increase
3499 // __kmp_threads_capacity to __kmp_max_threads + 1.
3500 //
3501 // 2) New foreign root(s) are encountered. We always register new
3502 // foreign roots. This may cause a smaller # of threads to be
3503 // allocated at subsequent parallel regions, but the worker threads
3504 // hang around (and eventually go to sleep) and need slots in the
3505 // __kmp_threads[] array.
3506 //
3507 // Anyway, that is the reason for moving the check to see if
3508 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3509 // instead of having it performed here. -BB
3510 //
3511 old_tp_cached = __kmp_tp_cached;
3512 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3513 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3514
3515 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3516 nTarget = nWish;
3517 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3518 /* can't fulfil nWish, so try nNeed */
3519 if(nNeed) {
3520 nTarget = nNeed;
3521 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3522 /* possible expansion too small -- give up */
3523 break;
3524 }
3525 } else {
3526 /* best-effort */
3527 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3528 if(!nTarget) {
3529 /* can expand at all -- give up */
3530 break;
3531 }
3532 }
3533 }
3534 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3535
3536 newCapacity = __kmp_threads_capacity;
3537 do{
3538 newCapacity =
3539 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3540 (newCapacity << 1) :
3541 __kmp_actual_max_nth;
3542 } while(newCapacity < minimumRequiredCapacity);
3543 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3544 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003545 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3546 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547 memset(newThreads + __kmp_threads_capacity, 0,
3548 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3549 memset(newRoot + __kmp_threads_capacity, 0,
3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3551
3552 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3553 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3554 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3555 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3556 of a double-check pair.
3557 */
3558 __kmp_free(newThreads);
3559 continue; /* start over and try again */
3560 }
3561 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3562 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3563 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3564 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3565 __kmp_free(newThreads);
3566 continue; /* start over and try again */
3567 } else {
3568 /* success */
3569 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3570 //
3571 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3572 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3573 added += newCapacity - __kmp_threads_capacity;
3574 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3575 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003576 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003577 }
3578 }
3579 return added;
3580}
3581
3582/* register the current thread as a root thread and obtain our gtid */
3583/* we must have the __kmp_initz_lock held at this point */
3584/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3585int
3586__kmp_register_root( int initial_thread )
3587{
3588 kmp_info_t *root_thread;
3589 kmp_root_t *root;
3590 int gtid;
3591 int capacity;
3592 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3593 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3594 KMP_MB();
3595
3596
3597 /*
3598 2007-03-02:
3599
3600 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3601 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3602 return false (that means there is at least one empty slot in __kmp_threads array), but it
3603 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3604 used for this one. Following code workarounds this bug.
3605
3606 However, right solution seems to be not reserving slot #0 for initial thread because:
3607 (1) there is no magic in slot #0,
3608 (2) we cannot detect initial thread reliably (the first thread which does serial
3609 initialization may be not a real initial thread).
3610 */
3611 capacity = __kmp_threads_capacity;
3612 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3613 -- capacity;
3614 }; // if
3615
3616 /* see if there are too many threads */
3617 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3618 if ( __kmp_tp_cached ) {
3619 __kmp_msg(
3620 kmp_ms_fatal,
3621 KMP_MSG( CantRegisterNewThread ),
3622 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3623 KMP_HNT( PossibleSystemLimitOnThreads ),
3624 __kmp_msg_null
3625 );
3626 }
3627 else {
3628 __kmp_msg(
3629 kmp_ms_fatal,
3630 KMP_MSG( CantRegisterNewThread ),
3631 KMP_HNT( SystemLimitOnThreads ),
3632 __kmp_msg_null
3633 );
3634 }
3635 }; // if
3636
3637 /* find an available thread slot */
3638 /* Don't reassign the zero slot since we need that to only be used by initial
3639 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003640 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3641 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003642 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3643 KMP_ASSERT( gtid < __kmp_threads_capacity );
3644
3645 /* update global accounting */
3646 __kmp_all_nth ++;
3647 TCW_4(__kmp_nth, __kmp_nth + 1);
3648
3649 //
3650 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3651 // for low numbers of procs, and method #2 (keyed API call) for higher
3652 // numbers of procs.
3653 //
3654 if ( __kmp_adjust_gtid_mode ) {
3655 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3656 if ( TCR_4(__kmp_gtid_mode) != 2) {
3657 TCW_4(__kmp_gtid_mode, 2);
3658 }
3659 }
3660 else {
3661 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3662 TCW_4(__kmp_gtid_mode, 1);
3663 }
3664 }
3665 }
3666
3667#ifdef KMP_ADJUST_BLOCKTIME
3668 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003669 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3671 if ( __kmp_nth > __kmp_avail_proc ) {
3672 __kmp_zero_bt = TRUE;
3673 }
3674 }
3675#endif /* KMP_ADJUST_BLOCKTIME */
3676
3677 /* setup this new hierarchy */
3678 if( ! ( root = __kmp_root[gtid] )) {
3679 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3680 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3681 }
3682
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003683#if KMP_STATS_ENABLED
3684 // Initialize stats as soon as possible (right after gtid assignment).
3685 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3686 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3687 KMP_SET_THREAD_STATE(SERIAL_REGION);
3688 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3689#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690 __kmp_initialize_root( root );
3691
3692 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003693 if( root->r.r_uber_thread ) {
3694 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695 } else {
3696 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3697 if ( __kmp_storage_map ) {
3698 __kmp_print_thread_storage_map( root_thread, gtid );
3699 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003700 root_thread->th.th_info .ds.ds_gtid = gtid;
3701 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003702 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003703 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704 }
3705 #if USE_FAST_MEMORY
3706 __kmp_initialize_fast_memory( root_thread );
3707 #endif /* USE_FAST_MEMORY */
3708
3709 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003710 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711 __kmp_initialize_bget( root_thread );
3712 #endif
3713 __kmp_init_random( root_thread ); // Initialize random number generator
3714 }
3715
3716 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003717 if( ! root_thread->th.th_serial_team ) {
3718 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003719 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003720
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003721 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003722#if OMPT_SUPPORT
3723 0, // root parallel id
3724#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003725#if OMP_40_ENABLED
3726 proc_bind_default,
3727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003729 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734
3735 /* drop root_thread into place */
3736 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3737
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003738 root->r.r_root_team->t.t_threads[0] = root_thread;
3739 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3740 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3741 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3742 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743
3744 /* initialize the thread, get it ready to go */
3745 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003746 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003747
3748 /* prepare the master thread for get_gtid() */
3749 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003750
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003751#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003752 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003753#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003754
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755 #ifdef KMP_TDATA_GTID
3756 __kmp_gtid = gtid;
3757 #endif
3758 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3759 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003760
3761 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3762 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003763 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003764 KMP_INIT_BARRIER_STATE ) );
3765 { // Initialize barrier data.
3766 int b;
3767 for ( b = 0; b < bs_last_barrier; ++ b ) {
3768 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003769#if USE_DEBUGGER
3770 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3771#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003772 }; // for
3773 }
3774 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3775
Alp Toker763b9392014-02-28 09:42:41 +00003776#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003777# if OMP_40_ENABLED
3778 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3779 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3780 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3781 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3782# endif
3783
Jim Cownie5e8470a2013-09-27 10:38:44 +00003784 if ( TCR_4(__kmp_init_middle) ) {
3785 __kmp_affinity_set_init_mask( gtid, TRUE );
3786 }
Alp Toker763b9392014-02-28 09:42:41 +00003787#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003788
3789 __kmp_root_counter ++;
3790
3791 KMP_MB();
3792 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3793
3794 return gtid;
3795}
3796
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003797#if KMP_NESTED_HOT_TEAMS
3798static int
3799__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3800{
3801 int i, n, nth;
3802 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3803 if( !hot_teams || !hot_teams[level].hot_team ) {
3804 return 0;
3805 }
3806 KMP_DEBUG_ASSERT( level < max_level );
3807 kmp_team_t *team = hot_teams[level].hot_team;
3808 nth = hot_teams[level].hot_team_nth;
3809 n = nth - 1; // master is not freed
3810 if( level < max_level - 1 ) {
3811 for( i = 0; i < nth; ++i ) {
3812 kmp_info_t *th = team->t.t_threads[i];
3813 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3814 if( i > 0 && th->th.th_hot_teams ) {
3815 __kmp_free( th->th.th_hot_teams );
3816 th->th.th_hot_teams = NULL;
3817 }
3818 }
3819 }
3820 __kmp_free_team( root, team, NULL );
3821 return n;
3822}
3823#endif
3824
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825/* Resets a root thread and clear its root and hot teams.
3826 Returns the number of __kmp_threads entries directly and indirectly freed.
3827*/
3828static int
3829__kmp_reset_root(int gtid, kmp_root_t *root)
3830{
3831 kmp_team_t * root_team = root->r.r_root_team;
3832 kmp_team_t * hot_team = root->r.r_hot_team;
3833 int n = hot_team->t.t_nproc;
3834 int i;
3835
3836 KMP_DEBUG_ASSERT( ! root->r.r_active );
3837
3838 root->r.r_root_team = NULL;
3839 root->r.r_hot_team = NULL;
3840 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3841 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003842 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3843#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003844 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003845 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3846 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003847 if( __kmp_hot_teams_max_level > 1 ) {
3848 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003850 if( th->th.th_hot_teams ) {
3851 __kmp_free( th->th.th_hot_teams );
3852 th->th.th_hot_teams = NULL;
3853 }
3854 }
3855 }
3856#endif
3857 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003858
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859 //
3860 // Before we can reap the thread, we need to make certain that all
3861 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3862 //
3863 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3864 __kmp_wait_to_unref_task_teams();
3865 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
3867 #if KMP_OS_WINDOWS
3868 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3869 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3870 (LPVOID)&(root->r.r_uber_thread->th),
3871 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3872 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3873 #endif /* KMP_OS_WINDOWS */
3874
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003875#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003876 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003877 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3878 int gtid = __kmp_get_gtid();
3879 __ompt_thread_end(ompt_thread_initial, gtid);
3880 }
3881#endif
3882
Jim Cownie5e8470a2013-09-27 10:38:44 +00003883 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3884 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3885
3886 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3887 root->r.r_uber_thread = NULL;
3888 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003889 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003890
3891 return n;
3892}
3893
3894void
3895__kmp_unregister_root_current_thread( int gtid )
3896{
Jim Cownie77c2a632014-09-03 11:34:33 +00003897 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003898 /* this lock should be ok, since unregister_root_current_thread is never called during
3899 * and abort, only during a normal close. furthermore, if you have the
3900 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003901
3902 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3903 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3904 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3905 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3906 return;
3907 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003908 kmp_root_t *root = __kmp_root[gtid];
3909
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3911 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3912 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3913 KMP_ASSERT( root->r.r_active == FALSE );
3914
Jim Cownie5e8470a2013-09-27 10:38:44 +00003915
3916 KMP_MB();
3917
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003918#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003919 kmp_info_t * thread = __kmp_threads[gtid];
3920 kmp_team_t * team = thread->th.th_team;
3921 kmp_task_team_t * task_team = thread->th.th_task_team;
3922
3923 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003924 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3925#if OMPT_SUPPORT
3926 // the runtime is shutting down so we won't report any events
3927 thread->th.ompt_thread_info.state = ompt_state_undefined;
3928#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003929 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003930 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003931#endif
3932
Jim Cownie5e8470a2013-09-27 10:38:44 +00003933 __kmp_reset_root(gtid, root);
3934
3935 /* free up this thread slot */
3936 __kmp_gtid_set_specific( KMP_GTID_DNE );
3937#ifdef KMP_TDATA_GTID
3938 __kmp_gtid = KMP_GTID_DNE;
3939#endif
3940
3941 KMP_MB();
3942 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3943
3944 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3945}
3946
Jonathan Peyton2321d572015-06-08 19:25:25 +00003947#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003948/* __kmp_forkjoin_lock must be already held
3949 Unregisters a root thread that is not the current thread. Returns the number of
3950 __kmp_threads entries freed as a result.
3951 */
3952static int
3953__kmp_unregister_root_other_thread( int gtid )
3954{
3955 kmp_root_t *root = __kmp_root[gtid];
3956 int r;
3957
3958 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3959 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3960 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3961 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3962 KMP_ASSERT( root->r.r_active == FALSE );
3963
3964 r = __kmp_reset_root(gtid, root);
3965 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3966 return r;
3967}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003968#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003969
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970#if KMP_DEBUG
3971void __kmp_task_info() {
3972
3973 kmp_int32 gtid = __kmp_entry_gtid();
3974 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3975 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003976 kmp_team_t *steam = this_thr->th.th_serial_team;
3977 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003978
3979 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3980 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3981}
3982#endif // KMP_DEBUG
3983
Jim Cownie5e8470a2013-09-27 10:38:44 +00003984/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003985 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986 * initialization of features as much as possible */
3987static void
3988__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3989{
3990 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3991 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003992 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003993 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003996 KMP_DEBUG_ASSERT( team->t.t_threads );
3997 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3998 KMP_DEBUG_ASSERT( master );
3999 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000
4001 KMP_MB();
4002
4003 TCW_SYNC_PTR(this_thr->th.th_team, team);
4004
4005 this_thr->th.th_info.ds.ds_tid = tid;
4006 this_thr->th.th_set_nproc = 0;
Andrey Churbanov581490e2017-02-06 18:53:32 +00004007 if (__kmp_tasking_mode != tskm_immediate_exec)
4008 // When tasking is possible, threads are not safe to reap until they are
4009 // done tasking; this will be set when tasking code is exited in wait
4010 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4011 else // no tasking --> always safe to reap
4012 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004013#if OMP_40_ENABLED
4014 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00004015# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004016 this_thr->th.th_new_place = this_thr->th.th_current_place;
4017# endif
4018#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004019 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004020
4021 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004022 this_thr->th.th_team_nproc = team->t.t_nproc;
4023 this_thr->th.th_team_master = master;
4024 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004025 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4026
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004027 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004028
4029 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4030 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4031
4032 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4033
4034 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4035 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4036 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004037
4038 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004039 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004040
4041 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
4043#ifdef BUILD_TV
4044 this_thr->th.th_local.tv_data = 0;
4045#endif
4046
4047 if ( ! this_thr->th.th_pri_common ) {
4048 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4049 if ( __kmp_storage_map ) {
4050 __kmp_print_storage_map_gtid(
4051 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4052 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4053 );
4054 }; // if
4055 this_thr->th.th_pri_head = NULL;
4056 }; // if
4057
4058 /* Initialize dynamic dispatch */
4059 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004060 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004061 /*
4062 * Use team max_nproc since this will never change for the team.
4063 */
4064 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004065 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4067 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004068 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4070
4071 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004072#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004073 dispatch->th_doacross_buf_idx = 0;
4074#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004075 if( ! dispatch->th_disp_buffer ) {
4076 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077
4078 if ( __kmp_storage_map ) {
4079 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004080 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4082 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4083 gtid, team->t.t_id, gtid );
4084 }
4085 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004086 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004087 }
4088
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004089 dispatch->th_dispatch_pr_current = 0;
4090 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004091
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004092 dispatch->th_deo_fcn = 0; /* ORDERED */
4093 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004094 }
4095
4096 this_thr->th.th_next_pool = NULL;
4097
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004098 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004099 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004100 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4101 this_thr->th.th_task_state_top = 0;
4102 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004103 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4104 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004105 }
4106
Jim Cownie5e8470a2013-09-27 10:38:44 +00004107 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4108 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4109
4110 KMP_MB();
4111}
4112
4113
4114/* allocate a new thread for the requesting team. this is only called from within a
4115 * forkjoin critical section. we will first try to get an available thread from the
4116 * thread pool. if none is available, we will fork a new one assuming we are able
4117 * to create a new one. this should be assured, as the caller should check on this
4118 * first.
4119 */
4120kmp_info_t *
4121__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4122{
4123 kmp_team_t *serial_team;
4124 kmp_info_t *new_thr;
4125 int new_gtid;
4126
4127 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4128 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004129#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004131#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004132 KMP_MB();
4133
4134 /* first, try to get one from the thread pool */
4135 if ( __kmp_thread_pool ) {
4136
4137 new_thr = (kmp_info_t*)__kmp_thread_pool;
4138 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4139 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4140 __kmp_thread_pool_insert_pt = NULL;
4141 }
4142 TCW_4(new_thr->th.th_in_pool, FALSE);
4143 //
4144 // Don't touch th_active_in_pool or th_active.
4145 // The worker thread adjusts those flags as it sleeps/awakens.
4146 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004147 __kmp_thread_pool_nth--;
4148
4149 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4150 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004151 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004152 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4153 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4154
4155 /* setup the thread structure */
4156 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4157 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4158
4159 TCW_4(__kmp_nth, __kmp_nth + 1);
4160
Jonathan Peyton54127982015-11-04 21:37:48 +00004161 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004162 new_thr->th.th_task_state_top = 0;
4163 new_thr->th.th_task_state_stack_sz = 4;
4164
Jim Cownie5e8470a2013-09-27 10:38:44 +00004165#ifdef KMP_ADJUST_BLOCKTIME
4166 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004167 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004168 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4169 if ( __kmp_nth > __kmp_avail_proc ) {
4170 __kmp_zero_bt = TRUE;
4171 }
4172 }
4173#endif /* KMP_ADJUST_BLOCKTIME */
4174
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004175#if KMP_DEBUG
4176 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4177 int b;
4178 kmp_balign_t * balign = new_thr->th.th_bar;
4179 for( b = 0; b < bs_last_barrier; ++ b )
4180 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4181#endif
4182
Jim Cownie5e8470a2013-09-27 10:38:44 +00004183 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4184 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4185
4186 KMP_MB();
4187 return new_thr;
4188 }
4189
4190
4191 /* no, well fork a new one */
4192 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4193 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4194
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004195#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004196 //
4197 // If this is the first worker thread the RTL is creating, then also
4198 // launch the monitor thread. We try to do this as early as possible.
4199 //
4200 if ( ! TCR_4( __kmp_init_monitor ) ) {
4201 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4202 if ( ! TCR_4( __kmp_init_monitor ) ) {
4203 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4204 TCW_4( __kmp_init_monitor, 1 );
4205 __kmp_create_monitor( & __kmp_monitor );
4206 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004207 #if KMP_OS_WINDOWS
4208 // AC: wait until monitor has started. This is a fix for CQ232808.
4209 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4210 // work in between, then there is high probability that monitor thread started after
4211 // the library shutdown. At shutdown it is too late to cope with the problem, because
4212 // when the master is in DllMain (process detach) the monitor has no chances to start
4213 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4214 // because all the memory which the monitor can access is going to be released/reset.
4215 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4216 KMP_YIELD( TRUE );
4217 }
4218 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4219 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004220 }
4221 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4222 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004223#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004224
4225 KMP_MB();
4226 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4227 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4228 }
4229
4230 /* allocate space for it. */
4231 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4232
4233 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4234
4235 if ( __kmp_storage_map ) {
4236 __kmp_print_thread_storage_map( new_thr, new_gtid );
4237 }
4238
4239 /* add the reserve serialized team, initialized from the team's master thread */
4240 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004241 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004242 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004243
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004244 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004245 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004246#if OMPT_SUPPORT
4247 0, // root parallel id
4248#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004249#if OMP_40_ENABLED
4250 proc_bind_default,
4251#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004252 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004253 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254 }
4255 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004256 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4257 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004258 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4259 new_thr ) );
4260
4261 /* setup the thread structures */
4262 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4263
4264 #if USE_FAST_MEMORY
4265 __kmp_initialize_fast_memory( new_thr );
4266 #endif /* USE_FAST_MEMORY */
4267
4268 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004269 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004270 __kmp_initialize_bget( new_thr );
4271 #endif
4272
4273 __kmp_init_random( new_thr ); // Initialize random number generator
4274
4275 /* Initialize these only once when thread is grabbed for a team allocation */
4276 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4277 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4278
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004279 int b;
4280 kmp_balign_t * balign = new_thr->th.th_bar;
4281 for(b=0; b<bs_last_barrier; ++b) {
4282 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4283 balign[b].bb.team = NULL;
4284 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4285 balign[b].bb.use_oncore_barrier = 0;
4286 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004287
4288 new_thr->th.th_spin_here = FALSE;
4289 new_thr->th.th_next_waiting = 0;
4290
Alp Toker98758b02014-03-02 04:12:06 +00004291#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004292 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4293 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4294 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4295 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4296#endif
4297
4298 TCW_4(new_thr->th.th_in_pool, FALSE);
4299 new_thr->th.th_active_in_pool = FALSE;
4300 TCW_4(new_thr->th.th_active, TRUE);
4301
4302 /* adjust the global counters */
4303 __kmp_all_nth ++;
4304 __kmp_nth ++;
4305
4306 //
4307 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4308 // for low numbers of procs, and method #2 (keyed API call) for higher
4309 // numbers of procs.
4310 //
4311 if ( __kmp_adjust_gtid_mode ) {
4312 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4313 if ( TCR_4(__kmp_gtid_mode) != 2) {
4314 TCW_4(__kmp_gtid_mode, 2);
4315 }
4316 }
4317 else {
4318 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4319 TCW_4(__kmp_gtid_mode, 1);
4320 }
4321 }
4322 }
4323
4324#ifdef KMP_ADJUST_BLOCKTIME
4325 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004326 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004327 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4328 if ( __kmp_nth > __kmp_avail_proc ) {
4329 __kmp_zero_bt = TRUE;
4330 }
4331 }
4332#endif /* KMP_ADJUST_BLOCKTIME */
4333
4334 /* actually fork it and create the new worker thread */
4335 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4336 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4337 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4338
Jim Cownie5e8470a2013-09-27 10:38:44 +00004339 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4340 KMP_MB();
4341 return new_thr;
4342}
4343
4344/*
4345 * reinitialize team for reuse.
4346 *
4347 * The hot team code calls this case at every fork barrier, so EPCC barrier
4348 * test are extremely sensitive to changes in it, esp. writes to the team
4349 * struct, which cause a cache invalidation in all threads.
4350 *
4351 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4352 */
4353static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004354__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004355 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4356 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004357 KMP_DEBUG_ASSERT( team && new_icvs);
4358 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004359 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004360
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004361 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004362
Jim Cownie181b4bb2013-12-23 17:28:57 +00004363 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004364 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004365 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004366
4367 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4368 team->t.t_threads[0], team ) );
4369}
4370
Jim Cownie5e8470a2013-09-27 10:38:44 +00004371
4372/* initialize the team data structure
4373 * this assumes the t_threads and t_max_nproc are already set
4374 * also, we don't touch the arguments */
4375static void
4376__kmp_initialize_team(
4377 kmp_team_t * team,
4378 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004379 kmp_internal_control_t * new_icvs,
4380 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004381) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004382 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4383
Jim Cownie5e8470a2013-09-27 10:38:44 +00004384 /* verify */
4385 KMP_DEBUG_ASSERT( team );
4386 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4387 KMP_DEBUG_ASSERT( team->t.t_threads );
4388 KMP_MB();
4389
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 team->t.t_master_tid = 0; /* not needed */
4391 /* team->t.t_master_bar; not needed */
4392 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4393 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004395 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4396 team->t.t_next_pool = NULL;
4397 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
4399 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004400 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004401
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004402 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4403 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004404
4405#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004406 team->t.t_fp_control_saved = FALSE; /* not needed */
4407 team->t.t_x87_fpu_control_word = 0; /* not needed */
4408 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4410
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004411 team->t.t_construct = 0;
4412 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004413
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004414 team->t.t_ordered .dt.t_value = 0;
4415 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004416
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004417 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004418
4419#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004420 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004421#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004422 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004423
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004424 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004426 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004427
4428 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004429 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430}
4431
Alp Toker98758b02014-03-02 04:12:06 +00004432#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004433/* Sets full mask for thread and returns old mask, no changes to structures. */
4434static void
4435__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4436{
4437 if ( KMP_AFFINITY_CAPABLE() ) {
4438 int status;
4439 if ( old_mask != NULL ) {
4440 status = __kmp_get_system_affinity( old_mask, TRUE );
4441 int error = errno;
4442 if ( status != 0 ) {
4443 __kmp_msg(
4444 kmp_ms_fatal,
4445 KMP_MSG( ChangeThreadAffMaskError ),
4446 KMP_ERR( error ),
4447 __kmp_msg_null
4448 );
4449 }
4450 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004451 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004452 }
4453}
4454#endif
4455
Alp Toker98758b02014-03-02 04:12:06 +00004456#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457
4458//
4459// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4460// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004461// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004462// The master thread's partition should already include its current binding.
4463//
4464static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004465__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004466{
4467 //
4468 // Copy the master thread's place partion to the team struct
4469 //
4470 kmp_info_t *master_th = team->t.t_threads[0];
4471 KMP_DEBUG_ASSERT( master_th != NULL );
4472 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4473 int first_place = master_th->th.th_first_place;
4474 int last_place = master_th->th.th_last_place;
4475 int masters_place = master_th->th.th_current_place;
4476 team->t.t_first_place = first_place;
4477 team->t.t_last_place = last_place;
4478
4479 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4480 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4481 masters_place, first_place, last_place ) );
4482
4483 switch ( proc_bind ) {
4484
4485 case proc_bind_default:
4486 //
4487 // serial teams might have the proc_bind policy set to
4488 // proc_bind_default. It doesn't matter, as we don't
4489 // rebind the master thread for any proc_bind policy.
4490 //
4491 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4492 break;
4493
4494 case proc_bind_master:
4495 {
4496 int f;
4497 int n_th = team->t.t_nproc;
4498 for ( f = 1; f < n_th; f++ ) {
4499 kmp_info_t *th = team->t.t_threads[f];
4500 KMP_DEBUG_ASSERT( th != NULL );
4501 th->th.th_first_place = first_place;
4502 th->th.th_last_place = last_place;
4503 th->th.th_new_place = masters_place;
4504
4505 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4506 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4507 team->t.t_id, f, masters_place, first_place, last_place ) );
4508 }
4509 }
4510 break;
4511
4512 case proc_bind_close:
4513 {
4514 int f;
4515 int n_th = team->t.t_nproc;
4516 int n_places;
4517 if ( first_place <= last_place ) {
4518 n_places = last_place - first_place + 1;
4519 }
4520 else {
4521 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4522 }
4523 if ( n_th <= n_places ) {
4524 int place = masters_place;
4525 for ( f = 1; f < n_th; f++ ) {
4526 kmp_info_t *th = team->t.t_threads[f];
4527 KMP_DEBUG_ASSERT( th != NULL );
4528
4529 if ( place == last_place ) {
4530 place = first_place;
4531 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004532 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004533 place = 0;
4534 }
4535 else {
4536 place++;
4537 }
4538 th->th.th_first_place = first_place;
4539 th->th.th_last_place = last_place;
4540 th->th.th_new_place = place;
4541
4542 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4543 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4544 team->t.t_id, f, place, first_place, last_place ) );
4545 }
4546 }
4547 else {
4548 int S, rem, gap, s_count;
4549 S = n_th / n_places;
4550 s_count = 0;
4551 rem = n_th - ( S * n_places );
4552 gap = rem > 0 ? n_places/rem : n_places;
4553 int place = masters_place;
4554 int gap_ct = gap;
4555 for ( f = 0; f < n_th; f++ ) {
4556 kmp_info_t *th = team->t.t_threads[f];
4557 KMP_DEBUG_ASSERT( th != NULL );
4558
4559 th->th.th_first_place = first_place;
4560 th->th.th_last_place = last_place;
4561 th->th.th_new_place = place;
4562 s_count++;
4563
4564 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4565 // do nothing, add an extra thread to place on next iteration
4566 }
4567 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4568 // we added an extra thread to this place; move to next place
4569 if ( place == last_place ) {
4570 place = first_place;
4571 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004572 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004573 place = 0;
4574 }
4575 else {
4576 place++;
4577 }
4578 s_count = 0;
4579 gap_ct = 1;
4580 rem--;
4581 }
4582 else if (s_count == S) { // place full; don't add extra
4583 if ( place == last_place ) {
4584 place = first_place;
4585 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004586 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004587 place = 0;
4588 }
4589 else {
4590 place++;
4591 }
4592 gap_ct++;
4593 s_count = 0;
4594 }
4595
4596 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4597 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4598 team->t.t_id, f, th->th.th_new_place, first_place,
4599 last_place ) );
4600 }
4601 KMP_DEBUG_ASSERT( place == masters_place );
4602 }
4603 }
4604 break;
4605
4606 case proc_bind_spread:
4607 {
4608 int f;
4609 int n_th = team->t.t_nproc;
4610 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004611 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004612 if ( first_place <= last_place ) {
4613 n_places = last_place - first_place + 1;
4614 }
4615 else {
4616 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4617 }
4618 if ( n_th <= n_places ) {
4619 int place = masters_place;
4620 int S = n_places/n_th;
4621 int s_count, rem, gap, gap_ct;
4622 rem = n_places - n_th*S;
4623 gap = rem ? n_th/rem : 1;
4624 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004625 thidx = n_th;
4626 if (update_master_only == 1)
4627 thidx = 1;
4628 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004629 kmp_info_t *th = team->t.t_threads[f];
4630 KMP_DEBUG_ASSERT( th != NULL );
4631
4632 th->th.th_first_place = place;
4633 th->th.th_new_place = place;
4634 s_count = 1;
4635 while (s_count < S) {
4636 if ( place == last_place ) {
4637 place = first_place;
4638 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004639 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004640 place = 0;
4641 }
4642 else {
4643 place++;
4644 }
4645 s_count++;
4646 }
4647 if (rem && (gap_ct == gap)) {
4648 if ( place == last_place ) {
4649 place = first_place;
4650 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004651 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004652 place = 0;
4653 }
4654 else {
4655 place++;
4656 }
4657 rem--;
4658 gap_ct = 0;
4659 }
4660 th->th.th_last_place = place;
4661 gap_ct++;
4662
4663 if ( place == last_place ) {
4664 place = first_place;
4665 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004666 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004667 place = 0;
4668 }
4669 else {
4670 place++;
4671 }
4672
4673 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4674 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4675 team->t.t_id, f, th->th.th_new_place,
4676 th->th.th_first_place, th->th.th_last_place ) );
4677 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004678 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004679 }
4680 else {
4681 int S, rem, gap, s_count;
4682 S = n_th / n_places;
4683 s_count = 0;
4684 rem = n_th - ( S * n_places );
4685 gap = rem > 0 ? n_places/rem : n_places;
4686 int place = masters_place;
4687 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004688 thidx = n_th;
4689 if (update_master_only == 1)
4690 thidx = 1;
4691 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004692 kmp_info_t *th = team->t.t_threads[f];
4693 KMP_DEBUG_ASSERT( th != NULL );
4694
4695 th->th.th_first_place = place;
4696 th->th.th_last_place = place;
4697 th->th.th_new_place = place;
4698 s_count++;
4699
4700 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4701 // do nothing, add an extra thread to place on next iteration
4702 }
4703 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4704 // we added an extra thread to this place; move on to next place
4705 if ( place == last_place ) {
4706 place = first_place;
4707 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004708 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004709 place = 0;
4710 }
4711 else {
4712 place++;
4713 }
4714 s_count = 0;
4715 gap_ct = 1;
4716 rem--;
4717 }
4718 else if (s_count == S) { // place is full; don't add extra thread
4719 if ( place == last_place ) {
4720 place = first_place;
4721 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004722 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004723 place = 0;
4724 }
4725 else {
4726 place++;
4727 }
4728 gap_ct++;
4729 s_count = 0;
4730 }
4731
4732 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4733 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4734 team->t.t_id, f, th->th.th_new_place,
4735 th->th.th_first_place, th->th.th_last_place) );
4736 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004737 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004738 }
4739 }
4740 break;
4741
4742 default:
4743 break;
4744 }
4745
4746 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4747}
4748
Alp Toker98758b02014-03-02 04:12:06 +00004749#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004750
4751/* allocate a new team data structure to use. take one off of the free pool if available */
4752kmp_team_t *
4753__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004754#if OMPT_SUPPORT
4755 ompt_parallel_id_t ompt_parallel_id,
4756#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004757#if OMP_40_ENABLED
4758 kmp_proc_bind_t new_proc_bind,
4759#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004760 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004761 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004762{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00004763 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004764 int f;
4765 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004766 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004767 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004768
4769 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4770 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4771 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4772 KMP_MB();
4773
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004774#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004775 kmp_hot_team_ptr_t *hot_teams;
4776 if( master ) {
4777 team = master->th.th_team;
4778 level = team->t.t_active_level;
4779 if( master->th.th_teams_microtask ) { // in teams construct?
4780 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4781 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4782 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4783 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4784 }
4785 }
4786 hot_teams = master->th.th_hot_teams;
4787 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4788 { // hot team has already been allocated for given level
4789 use_hot_team = 1;
4790 } else {
4791 use_hot_team = 0;
4792 }
4793 }
4794#endif
4795 // Optimization to use a "hot" team
4796 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004797 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004798#if KMP_NESTED_HOT_TEAMS
4799 team = hot_teams[level].hot_team;
4800#else
4801 team = root->r.r_hot_team;
4802#endif
4803#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004804 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004805 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4806 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004807 }
4808#endif
4809
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004810 // Has the number of threads changed?
4811 /* Let's assume the most common case is that the number of threads is unchanged, and
4812 put that case first. */
4813 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4814 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004815 // This case can mean that omp_set_num_threads() was called and the hot team size
4816 // was already reduced, so we check the special flag
4817 if ( team->t.t_size_changed == -1 ) {
4818 team->t.t_size_changed = 1;
4819 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004820 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004821 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004822
4823 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004824 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004825 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4826 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004827 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004828
4829 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4830
4831 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4832 0, team->t.t_threads[0], team ) );
4833 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4834
4835#if OMP_40_ENABLED
4836# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004837 if ( ( team->t.t_size_changed == 0 )
4838 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004839 if (new_proc_bind == proc_bind_spread) {
4840 __kmp_partition_places(team, 1); // add flag to update only master for spread
4841 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004842 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4843 team->t.t_id, new_proc_bind, team->t.t_first_place,
4844 team->t.t_last_place ) );
4845 }
4846 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004847 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004848 __kmp_partition_places( team );
4849 }
4850# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004851 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004852# endif /* KMP_AFFINITY_SUPPORTED */
4853#endif /* OMP_40_ENABLED */
4854 }
4855 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004856 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4857
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004858 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004859#if KMP_NESTED_HOT_TEAMS
4860 if( __kmp_hot_teams_mode == 0 ) {
4861 // AC: saved number of threads should correspond to team's value in this mode,
4862 // can be bigger in mode 1, when hot team has some threads in reserve
4863 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4864 hot_teams[level].hot_team_nth = new_nproc;
4865#endif // KMP_NESTED_HOT_TEAMS
4866 /* release the extra threads we don't need any more */
4867 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4868 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004869 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4870 // When decreasing team size, threads no longer in the team should unref task team.
4871 team->t.t_threads[f]->th.th_task_team = NULL;
4872 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004873 __kmp_free_thread( team->t.t_threads[ f ] );
4874 team->t.t_threads[ f ] = NULL;
4875 }
4876#if KMP_NESTED_HOT_TEAMS
4877 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004878 else {
4879 // When keeping extra threads in team, switch threads to wait on own b_go flag
4880 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4881 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4882 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4883 for (int b=0; b<bs_last_barrier; ++b) {
4884 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4885 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4886 }
4887 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4888 }
4889 }
4890 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004891#endif // KMP_NESTED_HOT_TEAMS
4892 team->t.t_nproc = new_nproc;
4893 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004894 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4895 team->t.t_sched.chunk != new_icvs->sched.chunk)
4896 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004897 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004898
Jim Cownie5e8470a2013-09-27 10:38:44 +00004899 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004900 for(f = 0; f < new_nproc; ++f) {
4901 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004902 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004903 // restore the current task state of the master thread: should be the implicit task
4904 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4905 0, team->t.t_threads[0], team ) );
4906
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004907 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004908
4909#ifdef KMP_DEBUG
4910 for ( f = 0; f < team->t.t_nproc; f++ ) {
4911 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4912 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4913 }
4914#endif
4915
4916#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004917 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004918# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004919 __kmp_partition_places( team );
4920# endif
4921#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004922 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004923 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004924#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004925 kmp_affin_mask_t *old_mask;
4926 if ( KMP_AFFINITY_CAPABLE() ) {
4927 KMP_CPU_ALLOC(old_mask);
4928 }
4929#endif
4930
4931 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4932
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004933 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004934
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004935#if KMP_NESTED_HOT_TEAMS
4936 int avail_threads = hot_teams[level].hot_team_nth;
4937 if( new_nproc < avail_threads )
4938 avail_threads = new_nproc;
4939 kmp_info_t **other_threads = team->t.t_threads;
4940 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4941 // Adjust barrier data of reserved threads (if any) of the team
4942 // Other data will be set in __kmp_initialize_info() below.
4943 int b;
4944 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4945 for ( b = 0; b < bs_last_barrier; ++ b ) {
4946 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4947 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004948#if USE_DEBUGGER
4949 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4950#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004951 }
4952 }
4953 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4954 // we have all needed threads in reserve, no need to allocate any
4955 // this only possible in mode 1, cannot have reserved threads in mode 0
4956 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4957 team->t.t_nproc = new_nproc; // just get reserved threads involved
4958 } else {
4959 // we may have some threads in reserve, but not enough
4960 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4961 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4962#endif // KMP_NESTED_HOT_TEAMS
4963 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004964 /* reallocate larger arrays */
4965 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004966 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004967 }
4968
Alp Toker98758b02014-03-02 04:12:06 +00004969#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004970 /* Temporarily set full mask for master thread before
4971 creation of workers. The reason is that workers inherit
4972 the affinity from master, so if a lot of workers are
4973 created on the single core quickly, they don't get
4974 a chance to set their own affinity for a long time.
4975 */
4976 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4977#endif
4978
4979 /* allocate new threads for the hot team */
4980 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4981 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4982 KMP_DEBUG_ASSERT( new_worker );
4983 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004984
Jonathan Peytond26e2132015-09-10 18:44:30 +00004985 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4987 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4988 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4989
4990 { // Initialize barrier data for new threads.
4991 int b;
4992 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004993 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004994 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004995 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004996#if USE_DEBUGGER
4997 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4998#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004999 }
5000 }
5001 }
5002
Alp Toker98758b02014-03-02 04:12:06 +00005003#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005004 if ( KMP_AFFINITY_CAPABLE() ) {
5005 /* Restore initial master thread's affinity mask */
5006 __kmp_set_system_affinity( old_mask, TRUE );
5007 KMP_CPU_FREE(old_mask);
5008 }
5009#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005010#if KMP_NESTED_HOT_TEAMS
5011 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5012#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005013 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00005014 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005015 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005016
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005017 /* reinitialize the threads */
5018 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005019 for (f=0; f < team->t.t_nproc; ++f)
5020 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5021 if (level) { // set th_task_state for new threads in nested hot team
5022 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005023 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005024 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5025 for (f=old_nproc; f < team->t.t_nproc; ++f)
5026 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005027 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005028 else { // set th_task_state for new threads in non-nested hot team
5029 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5030 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005031 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005032 }
5033
Jim Cownie5e8470a2013-09-27 10:38:44 +00005034#ifdef KMP_DEBUG
5035 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5036 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5037 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5038 }
5039#endif
5040
5041#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005042 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005043# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005044 __kmp_partition_places( team );
5045# endif
5046#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005047 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005048
5049#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005050 kmp_info_t *master = team->t.t_threads[0];
5051 if( master->th.th_teams_microtask ) {
5052 for( f = 1; f < new_nproc; ++f ) {
5053 // propagate teams construct specific info to workers
5054 kmp_info_t *thr = team->t.t_threads[f];
5055 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5056 thr->th.th_teams_level = master->th.th_teams_level;
5057 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005058 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005059 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005060#endif /* OMP_40_ENABLED */
5061#if KMP_NESTED_HOT_TEAMS
5062 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005063 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005064 for( f = 1; f < new_nproc; ++f ) {
5065 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005066 int b;
5067 kmp_balign_t * balign = thr->th.th_bar;
5068 for( b = 0; b < bs_last_barrier; ++ b ) {
5069 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5070 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005071#if USE_DEBUGGER
5072 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5073#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005074 }
5075 }
5076 }
5077#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005078
5079 /* reallocate space for arguments if necessary */
5080 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005081 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005082 //
5083 // The hot team re-uses the previous task team,
5084 // if untouched during the previous release->gather phase.
5085 //
5086
5087 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5088
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005089#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005090 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005091 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5092 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005093 }
5094#endif
5095
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005096#if OMPT_SUPPORT
5097 __ompt_team_assign_id(team, ompt_parallel_id);
5098#endif
5099
Jim Cownie5e8470a2013-09-27 10:38:44 +00005100 KMP_MB();
5101
5102 return team;
5103 }
5104
5105 /* next, let's try to take one from the team pool */
5106 KMP_MB();
5107 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5108 {
5109 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5110 if ( team->t.t_max_nproc >= max_nproc ) {
5111 /* take this team from the team pool */
5112 __kmp_team_pool = team->t.t_next_pool;
5113
5114 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005115 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005116
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005117 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5118 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5119 team->t.t_task_team[0] = NULL;
5120 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005121
5122 /* reallocate space for arguments if necessary */
5123 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005124 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005125
5126 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5127 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5128 { // Initialize barrier data.
5129 int b;
5130 for ( b = 0; b < bs_last_barrier; ++ b) {
5131 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005132#if USE_DEBUGGER
5133 team->t.t_bar[ b ].b_master_arrived = 0;
5134 team->t.t_bar[ b ].b_team_arrived = 0;
5135#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005136 }
5137 }
5138
5139#if OMP_40_ENABLED
5140 team->t.t_proc_bind = new_proc_bind;
5141#endif
5142
5143 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005144
5145#if OMPT_SUPPORT
5146 __ompt_team_assign_id(team, ompt_parallel_id);
5147#endif
5148
Jim Cownie5e8470a2013-09-27 10:38:44 +00005149 KMP_MB();
5150
5151 return team;
5152 }
5153
5154 /* reap team if it is too small, then loop back and check the next one */
5155 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5156 /* TODO: Use technique to find the right size hot-team, don't reap them */
5157 team = __kmp_reap_team( team );
5158 __kmp_team_pool = team;
5159 }
5160
5161 /* nothing available in the pool, no matter, make a new team! */
5162 KMP_MB();
5163 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5164
5165 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005166 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005167 /* NOTE well, for some reason allocating one big buffer and dividing it
5168 * up seems to really hurt performance a lot on the P4, so, let's not use
5169 * this... */
5170 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005171
5172 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005173 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005174
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005175 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5176 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5177 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5178 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005179
5180 if ( __kmp_storage_map ) {
5181 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5182 }
5183
5184 /* allocate space for arguments */
5185 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005186 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005187
5188 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5189 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5190 { // Initialize barrier data.
5191 int b;
5192 for ( b = 0; b < bs_last_barrier; ++ b ) {
5193 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005194#if USE_DEBUGGER
5195 team->t.t_bar[ b ].b_master_arrived = 0;
5196 team->t.t_bar[ b ].b_team_arrived = 0;
5197#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005198 }
5199 }
5200
5201#if OMP_40_ENABLED
5202 team->t.t_proc_bind = new_proc_bind;
5203#endif
5204
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005205#if OMPT_SUPPORT
5206 __ompt_team_assign_id(team, ompt_parallel_id);
5207 team->t.ompt_serialized_team_info = NULL;
5208#endif
5209
Jim Cownie5e8470a2013-09-27 10:38:44 +00005210 KMP_MB();
5211
5212 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5213
5214 return team;
5215}
5216
5217/* TODO implement hot-teams at all levels */
5218/* TODO implement lazy thread release on demand (disband request) */
5219
5220/* free the team. return it to the team pool. release all the threads
5221 * associated with it */
5222void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005223__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005224{
5225 int f;
5226 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5227
5228 /* verify state */
5229 KMP_DEBUG_ASSERT( root );
5230 KMP_DEBUG_ASSERT( team );
5231 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5232 KMP_DEBUG_ASSERT( team->t.t_threads );
5233
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005234 int use_hot_team = team == root->r.r_hot_team;
5235#if KMP_NESTED_HOT_TEAMS
5236 int level;
5237 kmp_hot_team_ptr_t *hot_teams;
5238 if( master ) {
5239 level = team->t.t_active_level - 1;
5240 if( master->th.th_teams_microtask ) { // in teams construct?
5241 if( master->th.th_teams_size.nteams > 1 ) {
5242 ++level; // level was not increased in teams construct for team_of_masters
5243 }
5244 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5245 master->th.th_teams_level == team->t.t_level ) {
5246 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5247 } // team->t.t_level will be increased inside parallel
5248 }
5249 hot_teams = master->th.th_hot_teams;
5250 if( level < __kmp_hot_teams_max_level ) {
5251 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5252 use_hot_team = 1;
5253 }
5254 }
5255#endif // KMP_NESTED_HOT_TEAMS
5256
Jim Cownie5e8470a2013-09-27 10:38:44 +00005257 /* team is done working */
5258 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005259 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005260 // Do not reset pointer to parent team to NULL for hot teams.
5261
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005262 /* if we are non-hot team, release our threads */
5263 if( ! use_hot_team ) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005264 if (__kmp_tasking_mode != tskm_immediate_exec) {
5265 // Wait for threads to reach reapable state
5266 for (f = 1; f < team->t.t_nproc; ++f) {
5267 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5268 volatile kmp_uint32 *state = &team->t.t_threads[f]->th.th_reap_state;
5269 while (*state != KMP_SAFE_TO_REAP) {
5270#if KMP_OS_WINDOWS
5271 // On Windows a thread can be killed at any time, check this
5272 DWORD ecode;
5273 if (__kmp_is_thread_alive(team->t.t_threads[f], &ecode))
5274 KMP_CPU_PAUSE();
5275 else
5276 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5277#else
5278 KMP_CPU_PAUSE();
5279#endif
5280 }
5281 }
5282
Jonathan Peyton54127982015-11-04 21:37:48 +00005283 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005284 int tt_idx;
5285 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005286 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5287 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005288 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5289 team->t.t_threads[f]->th.th_task_team = NULL;
5290 }
5291 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005292#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005293 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005294#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005295 team->t.t_task_team[tt_idx] = NULL;
5296 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005297 }
5298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005299
5300 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005301 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005302 team->t.t_level = 0;
5303 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005304
Jim Cownie5e8470a2013-09-27 10:38:44 +00005305 /* free the worker threads */
5306 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5307 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5308 __kmp_free_thread( team->t.t_threads[ f ] );
5309 team->t.t_threads[ f ] = NULL;
5310 }
5311
Jim Cownie5e8470a2013-09-27 10:38:44 +00005312 /* put the team back in the team pool */
5313 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005314 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005315 __kmp_team_pool = (volatile kmp_team_t*) team;
5316 }
5317
5318 KMP_MB();
5319}
5320
5321
5322/* reap the team. destroy it, reclaim all its resources and free its memory */
5323kmp_team_t *
5324__kmp_reap_team( kmp_team_t *team )
5325{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005326 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005327
5328 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005329 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5330 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5331 KMP_DEBUG_ASSERT( team->t.t_threads );
5332 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005333
5334 /* TODO clean the threads that are a part of this? */
5335
5336 /* free stuff */
5337
5338 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005339 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5340 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005341 __kmp_free( team );
5342
5343 KMP_MB();
5344 return next_pool;
5345}
5346
5347//
5348// Free the thread. Don't reap it, just place it on the pool of available
5349// threads.
5350//
5351// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5352// binding for the affinity mechanism to be useful.
5353//
5354// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5355// However, we want to avoid a potential performance problem by always
5356// scanning through the list to find the correct point at which to insert
5357// the thread (potential N**2 behavior). To do this we keep track of the
5358// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5359// With single-level parallelism, threads will always be added to the tail
5360// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5361// parallelism, all bets are off and we may need to scan through the entire
5362// free list.
5363//
5364// This change also has a potentially large performance benefit, for some
5365// applications. Previously, as threads were freed from the hot team, they
5366// would be placed back on the free list in inverse order. If the hot team
5367// grew back to it's original size, then the freed thread would be placed
5368// back on the hot team in reverse order. This could cause bad cache
5369// locality problems on programs where the size of the hot team regularly
5370// grew and shrunk.
5371//
5372// Now, for single-level parallelism, the OMP tid is alway == gtid.
5373//
5374void
5375__kmp_free_thread( kmp_info_t *this_th )
5376{
5377 int gtid;
5378 kmp_info_t **scan;
5379
5380 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5381 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5382
5383 KMP_DEBUG_ASSERT( this_th );
5384
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005385 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5386 int b;
5387 kmp_balign_t *balign = this_th->th.th_bar;
5388 for (b=0; b<bs_last_barrier; ++b) {
5389 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5390 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5391 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005392 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005393 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005394 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005395
Jim Cownie5e8470a2013-09-27 10:38:44 +00005396 /* put thread back on the free pool */
5397 TCW_PTR(this_th->th.th_team, NULL);
5398 TCW_PTR(this_th->th.th_root, NULL);
5399 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5400
5401 //
5402 // If the __kmp_thread_pool_insert_pt is already past the new insert
5403 // point, then we need to re-scan the entire list.
5404 //
5405 gtid = this_th->th.th_info.ds.ds_gtid;
5406 if ( __kmp_thread_pool_insert_pt != NULL ) {
5407 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5408 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5409 __kmp_thread_pool_insert_pt = NULL;
5410 }
5411 }
5412
5413 //
5414 // Scan down the list to find the place to insert the thread.
5415 // scan is the address of a link in the list, possibly the address of
5416 // __kmp_thread_pool itself.
5417 //
5418 // In the absence of nested parallism, the for loop will have 0 iterations.
5419 //
5420 if ( __kmp_thread_pool_insert_pt != NULL ) {
5421 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5422 }
5423 else {
5424 scan = (kmp_info_t **)&__kmp_thread_pool;
5425 }
5426 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5427 scan = &( (*scan)->th.th_next_pool ) );
5428
5429 //
5430 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5431 // to its address.
5432 //
5433 TCW_PTR(this_th->th.th_next_pool, *scan);
5434 __kmp_thread_pool_insert_pt = *scan = this_th;
5435 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5436 || ( this_th->th.th_info.ds.ds_gtid
5437 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5438 TCW_4(this_th->th.th_in_pool, TRUE);
5439 __kmp_thread_pool_nth++;
5440
5441 TCW_4(__kmp_nth, __kmp_nth - 1);
5442
5443#ifdef KMP_ADJUST_BLOCKTIME
5444 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005445 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005446 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5447 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5448 if ( __kmp_nth <= __kmp_avail_proc ) {
5449 __kmp_zero_bt = FALSE;
5450 }
5451 }
5452#endif /* KMP_ADJUST_BLOCKTIME */
5453
5454 KMP_MB();
5455}
5456
Jim Cownie5e8470a2013-09-27 10:38:44 +00005457
Jim Cownie5e8470a2013-09-27 10:38:44 +00005458/* ------------------------------------------------------------------------ */
5459
5460void *
5461__kmp_launch_thread( kmp_info_t *this_thr )
5462{
5463 int gtid = this_thr->th.th_info.ds.ds_gtid;
5464/* void *stack_data;*/
5465 kmp_team_t *(*volatile pteam);
5466
5467 KMP_MB();
5468 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5469
5470 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005471 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005472 }
5473
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005474#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005475 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005476 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5477 this_thr->th.ompt_thread_info.wait_id = 0;
5478 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005479 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005480 __ompt_thread_begin(ompt_thread_worker, gtid);
5481 }
5482 }
5483#endif
5484
Jim Cownie5e8470a2013-09-27 10:38:44 +00005485 /* This is the place where threads wait for work */
5486 while( ! TCR_4(__kmp_global.g.g_done) ) {
5487 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5488 KMP_MB();
5489
5490 /* wait for work to do */
5491 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5492
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005493#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005494 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005495 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5496 }
5497#endif
5498
Jim Cownie5e8470a2013-09-27 10:38:44 +00005499 /* No tid yet since not part of a team */
5500 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5501
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005502#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005503 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005504 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5505 }
5506#endif
5507
Jim Cownie5e8470a2013-09-27 10:38:44 +00005508 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5509
5510 /* have we been allocated? */
5511 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005512#if OMPT_SUPPORT
5513 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005514 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005515 if (ompt_enabled) {
5516 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005517 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005518 }
5519#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005520 /* we were just woken up, so run our new task */
5521 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5522 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005523 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5524 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005525
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005526 updateHWFPControl (*pteam);
5527
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005528#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005529 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005530 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005531 // Initialize OMPT task id for implicit task.
5532 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005533 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005534 }
5535#endif
5536
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005537 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005538 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5539 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005540 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005541 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005542 KMP_ASSERT( rc );
5543
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005544#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005545 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005546 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005547 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005548
5549 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5550 }
5551#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005552 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005553 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5554 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005555 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005556 /* join barrier after parallel region */
5557 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005558#if OMPT_SUPPORT && OMPT_TRACE
5559 if (ompt_enabled) {
5560 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005561 // don't access *pteam here: it may have already been freed
5562 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5564 my_parallel_id, task_info->task_id);
5565 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005566 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005567 task_info->task_id = 0;
5568 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005569#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005570 }
5571 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005572 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005573
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005574#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005575 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005576 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5577 __ompt_thread_end(ompt_thread_worker, gtid);
5578 }
5579#endif
5580
Jonathan Peyton54127982015-11-04 21:37:48 +00005581 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005582 /* run the destructors for the threadprivate data for this thread */
5583 __kmp_common_destroy_gtid( gtid );
5584
5585 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5586 KMP_MB();
5587 return this_thr;
5588}
5589
5590/* ------------------------------------------------------------------------ */
5591/* ------------------------------------------------------------------------ */
5592
Jim Cownie5e8470a2013-09-27 10:38:44 +00005593void
5594__kmp_internal_end_dest( void *specific_gtid )
5595{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005596 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005597 #pragma warning( push )
5598 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5599 #endif
5600 // Make sure no significant bits are lost
5601 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005602 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005603 #pragma warning( pop )
5604 #endif
5605
5606 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5607 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5608 * this is because 0 is reserved for the nothing-stored case */
5609
5610 /* josh: One reason for setting the gtid specific data even when it is being
5611 destroyed by pthread is to allow gtid lookup through thread specific data
5612 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5613 that gets executed in the call to __kmp_internal_end_thread, actually
5614 gets the gtid through the thread specific data. Setting it here seems
5615 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5616 to run smoothly.
5617 todo: get rid of this after we remove the dependence on
5618 __kmp_gtid_get_specific
5619 */
5620 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5621 __kmp_gtid_set_specific( gtid );
5622 #ifdef KMP_TDATA_GTID
5623 __kmp_gtid = gtid;
5624 #endif
5625 __kmp_internal_end_thread( gtid );
5626}
5627
Jonathan Peyton99016992015-05-26 17:32:53 +00005628#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005629
5630// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005631// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005632// option in makefile.mk works fine.
5633
5634__attribute__(( destructor ))
5635void
5636__kmp_internal_end_dtor( void )
5637{
5638 __kmp_internal_end_atexit();
5639}
5640
5641void
5642__kmp_internal_end_fini( void )
5643{
5644 __kmp_internal_end_atexit();
5645}
5646
5647#endif
5648
5649/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5650void
5651__kmp_internal_end_atexit( void )
5652{
5653 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5654 /* [Windows]
5655 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5656 stat code that depends on thread specific data for gtid fails because that data becomes
5657 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5658 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5659 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5660
5661// TODO: Can some of this comment about GVS be removed?
5662 I suspect that the offending stat code is executed when the calling thread tries to
5663 clean up a dead root thread's data structures, resulting in GVS code trying to close
5664 the GVS structures for that thread, but since the stat code uses
5665 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5666 cleaning up itself instead of another thread, it gets confused. This happens because
5667 allowing a thread to unregister and cleanup another thread is a recent modification for
5668 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5669 thread may end up trying to unregister another thread only if thread death does not
5670 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5671 specific data destructor function to detect thread death. For Windows dynamic, there
5672 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5673 workaround is applicable only for Windows static stat library.
5674 */
5675 __kmp_internal_end_library( -1 );
5676 #if KMP_OS_WINDOWS
5677 __kmp_close_console();
5678 #endif
5679}
5680
5681static void
5682__kmp_reap_thread(
5683 kmp_info_t * thread,
5684 int is_root
5685) {
5686
Alp Toker8f2d3f02014-02-24 10:40:15 +00005687 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005688
5689 int gtid;
5690
5691 KMP_DEBUG_ASSERT( thread != NULL );
5692
5693 gtid = thread->th.th_info.ds.ds_gtid;
5694
5695 if ( ! is_root ) {
5696
5697 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5698 /* Assume the threads are at the fork barrier here */
5699 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5700 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00005701 ANNOTATE_HAPPENS_BEFORE(thread);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005702 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5703 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005704 }; // if
5705
Jim Cownie5e8470a2013-09-27 10:38:44 +00005706 // Terminate OS thread.
5707 __kmp_reap_worker( thread );
5708
5709 //
5710 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005711 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005712 //
5713 // There is a small timing hole here - if the worker thread was
5714 // just waking up after sleeping in the pool, had reset it's
5715 // th_active_in_pool flag but not decremented the global counter
5716 // __kmp_thread_pool_active_nth yet, then the global counter
5717 // might not get updated.
5718 //
5719 // Currently, this can only happen as the library is unloaded,
5720 // so there are no harmful side effects.
5721 //
5722 if ( thread->th.th_active_in_pool ) {
5723 thread->th.th_active_in_pool = FALSE;
5724 KMP_TEST_THEN_DEC32(
5725 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5726 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5727 }
5728
5729 // Decrement # of [worker] threads in the pool.
5730 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5731 --__kmp_thread_pool_nth;
5732 }; // if
5733
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005734 __kmp_free_implicit_task(thread);
5735
Jim Cownie5e8470a2013-09-27 10:38:44 +00005736 // Free the fast memory for tasking
5737 #if USE_FAST_MEMORY
5738 __kmp_free_fast_memory( thread );
5739 #endif /* USE_FAST_MEMORY */
5740
5741 __kmp_suspend_uninitialize_thread( thread );
5742
5743 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5744 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5745
5746 -- __kmp_all_nth;
5747 // __kmp_nth was decremented when thread is added to the pool.
5748
5749#ifdef KMP_ADJUST_BLOCKTIME
5750 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005751 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005752 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5753 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5754 if ( __kmp_nth <= __kmp_avail_proc ) {
5755 __kmp_zero_bt = FALSE;
5756 }
5757 }
5758#endif /* KMP_ADJUST_BLOCKTIME */
5759
5760 /* free the memory being used */
5761 if( __kmp_env_consistency_check ) {
5762 if ( thread->th.th_cons ) {
5763 __kmp_free_cons_stack( thread->th.th_cons );
5764 thread->th.th_cons = NULL;
5765 }; // if
5766 }
5767
5768 if ( thread->th.th_pri_common != NULL ) {
5769 __kmp_free( thread->th.th_pri_common );
5770 thread->th.th_pri_common = NULL;
5771 }; // if
5772
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005773 if (thread->th.th_task_state_memo_stack != NULL) {
5774 __kmp_free(thread->th.th_task_state_memo_stack);
5775 thread->th.th_task_state_memo_stack = NULL;
5776 }
5777
Jim Cownie5e8470a2013-09-27 10:38:44 +00005778 #if KMP_USE_BGET
5779 if ( thread->th.th_local.bget_data != NULL ) {
5780 __kmp_finalize_bget( thread );
5781 }; // if
5782 #endif
5783
Alp Toker98758b02014-03-02 04:12:06 +00005784#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005785 if ( thread->th.th_affin_mask != NULL ) {
5786 KMP_CPU_FREE( thread->th.th_affin_mask );
5787 thread->th.th_affin_mask = NULL;
5788 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005789#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005790
5791 __kmp_reap_team( thread->th.th_serial_team );
5792 thread->th.th_serial_team = NULL;
5793 __kmp_free( thread );
5794
5795 KMP_MB();
5796
5797} // __kmp_reap_thread
5798
5799static void
5800__kmp_internal_end(void)
5801{
5802 int i;
5803
5804 /* First, unregister the library */
5805 __kmp_unregister_library();
5806
5807 #if KMP_OS_WINDOWS
5808 /* In Win static library, we can't tell when a root actually dies, so we
5809 reclaim the data structures for any root threads that have died but not
5810 unregistered themselves, in order to shut down cleanly.
5811 In Win dynamic library we also can't tell when a thread dies.
5812 */
5813 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5814 #endif
5815
5816 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5817 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005818 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005819 break;
5820 KMP_MB(); /* Flush all pending memory write invalidates. */
5821 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5822
5823 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005824#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005825 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5826 KMP_MB(); /* Flush all pending memory write invalidates. */
5827
5828 //
5829 // Need to check that monitor was initialized before reaping it.
5830 // If we are called form __kmp_atfork_child (which sets
5831 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5832 // contain valid data, but it is only valid in the parent process,
5833 // not the child.
5834 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005835 // New behavior (201008): instead of keying off of the flag
5836 // __kmp_init_parallel, the monitor thread creation is keyed off
5837 // of the new flag __kmp_init_monitor.
5838 //
5839 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5840 if ( TCR_4( __kmp_init_monitor ) ) {
5841 __kmp_reap_monitor( & __kmp_monitor );
5842 TCW_4( __kmp_init_monitor, 0 );
5843 }
5844 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5845 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005846#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005847 } else {
5848 /* TODO move this to cleanup code */
5849 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005850 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005851 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5852 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005853// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005854 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005855 }
5856 }
5857 #endif
5858
5859 KMP_MB();
5860
5861 // Reap the worker threads.
5862 // This is valid for now, but be careful if threads are reaped sooner.
5863 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5864 // Get the next thread from the pool.
5865 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5866 __kmp_thread_pool = thread->th.th_next_pool;
5867 // Reap it.
Andrey Churbanov581490e2017-02-06 18:53:32 +00005868 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005869 thread->th.th_next_pool = NULL;
5870 thread->th.th_in_pool = FALSE;
5871 __kmp_reap_thread( thread, 0 );
5872 }; // while
5873 __kmp_thread_pool_insert_pt = NULL;
5874
5875 // Reap teams.
5876 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5877 // Get the next team from the pool.
5878 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5879 __kmp_team_pool = team->t.t_next_pool;
5880 // Reap it.
5881 team->t.t_next_pool = NULL;
5882 __kmp_reap_team( team );
5883 }; // while
5884
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005885 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005886
5887 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5888 // TBD: Add some checking...
5889 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5890 }
5891
5892 /* Make sure all threadprivate destructors get run by joining with all worker
5893 threads before resetting this flag */
5894 TCW_SYNC_4(__kmp_init_common, FALSE);
5895
5896 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5897 KMP_MB();
5898
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005899#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005900 //
5901 // See note above: One of the possible fixes for CQ138434 / CQ140126
5902 //
5903 // FIXME: push both code fragments down and CSE them?
5904 // push them into __kmp_cleanup() ?
5905 //
5906 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5907 if ( TCR_4( __kmp_init_monitor ) ) {
5908 __kmp_reap_monitor( & __kmp_monitor );
5909 TCW_4( __kmp_init_monitor, 0 );
5910 }
5911 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5912 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005913#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005914 } /* else !__kmp_global.t_active */
5915 TCW_4(__kmp_init_gtid, FALSE);
5916 KMP_MB(); /* Flush all pending memory write invalidates. */
5917
Jim Cownie5e8470a2013-09-27 10:38:44 +00005918 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005919#if OMPT_SUPPORT
5920 ompt_fini();
5921#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005922}
5923
5924void
5925__kmp_internal_end_library( int gtid_req )
5926{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005927 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5928 /* this shouldn't be a race condition because __kmp_internal_end() is the
5929 * only place to clear __kmp_serial_init */
5930 /* we'll check this later too, after we get the lock */
5931 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5932 // because the next check will work in any case.
5933 if( __kmp_global.g.g_abort ) {
5934 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5935 /* TODO abort? */
5936 return;
5937 }
5938 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5939 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5940 return;
5941 }
5942
5943
5944 KMP_MB(); /* Flush all pending memory write invalidates. */
5945
5946 /* find out who we are and what we should do */
5947 {
5948 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5949 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5950 if( gtid == KMP_GTID_SHUTDOWN ) {
5951 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5952 return;
5953 } else if( gtid == KMP_GTID_MONITOR ) {
5954 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5955 return;
5956 } else if( gtid == KMP_GTID_DNE ) {
5957 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5958 /* we don't know who we are, but we may still shutdown the library */
5959 } else if( KMP_UBER_GTID( gtid )) {
5960 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005961 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005962 __kmp_global.g.g_abort = -1;
5963 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5964 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5965 return;
5966 } else {
5967 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5968 __kmp_unregister_root_current_thread( gtid );
5969 }
5970 } else {
5971 /* worker threads may call this function through the atexit handler, if they call exit() */
5972 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5973 TODO: do a thorough shutdown instead
5974 */
5975 #ifdef DUMP_DEBUG_ON_EXIT
5976 if ( __kmp_debug_buf )
5977 __kmp_dump_debug_buffer( );
5978 #endif
5979 return;
5980 }
5981 }
5982 /* synchronize the termination process */
5983 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5984
5985 /* have we already finished */
5986 if( __kmp_global.g.g_abort ) {
5987 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5988 /* TODO abort? */
5989 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5990 return;
5991 }
5992 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5993 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5994 return;
5995 }
5996
5997 /* We need this lock to enforce mutex between this reading of
5998 __kmp_threads_capacity and the writing by __kmp_register_root.
5999 Alternatively, we can use a counter of roots that is
6000 atomically updated by __kmp_get_global_thread_id_reg,
6001 __kmp_do_serial_initialize and __kmp_internal_end_*.
6002 */
6003 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6004
6005 /* now we can safely conduct the actual termination */
6006 __kmp_internal_end();
6007
6008 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6009 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6010
6011 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
6012
6013 #ifdef DUMP_DEBUG_ON_EXIT
6014 if ( __kmp_debug_buf )
6015 __kmp_dump_debug_buffer();
6016 #endif
6017
6018 #if KMP_OS_WINDOWS
6019 __kmp_close_console();
6020 #endif
6021
6022 __kmp_fini_allocator();
6023
6024} // __kmp_internal_end_library
6025
6026void
6027__kmp_internal_end_thread( int gtid_req )
6028{
6029 int i;
6030
6031 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6032 /* this shouldn't be a race condition because __kmp_internal_end() is the
6033 * only place to clear __kmp_serial_init */
6034 /* we'll check this later too, after we get the lock */
6035 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6036 // because the next check will work in any case.
6037 if( __kmp_global.g.g_abort ) {
6038 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6039 /* TODO abort? */
6040 return;
6041 }
6042 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6043 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6044 return;
6045 }
6046
6047 KMP_MB(); /* Flush all pending memory write invalidates. */
6048
6049 /* find out who we are and what we should do */
6050 {
6051 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6052 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6053 if( gtid == KMP_GTID_SHUTDOWN ) {
6054 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6055 return;
6056 } else if( gtid == KMP_GTID_MONITOR ) {
6057 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6058 return;
6059 } else if( gtid == KMP_GTID_DNE ) {
6060 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6061 return;
6062 /* we don't know who we are */
6063 } else if( KMP_UBER_GTID( gtid )) {
6064 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006065 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006066 __kmp_global.g.g_abort = -1;
6067 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6068 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6069 return;
6070 } else {
6071 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6072 __kmp_unregister_root_current_thread( gtid );
6073 }
6074 } else {
6075 /* just a worker thread, let's leave */
6076 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6077
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006078 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006079 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006080 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006081
6082 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6083 return;
6084 }
6085 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006086 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006087 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6088 // because we will better shutdown later in the library destructor.
6089 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006090 // in a loop forks and joins many openmp threads. We can save a lot of time
6091 // keeping worker threads alive until the program shutdown.
6092 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6093 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006094 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006095 return;
6096 #endif
6097 /* synchronize the termination process */
6098 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6099
6100 /* have we already finished */
6101 if( __kmp_global.g.g_abort ) {
6102 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6103 /* TODO abort? */
6104 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6105 return;
6106 }
6107 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6108 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6109 return;
6110 }
6111
6112 /* We need this lock to enforce mutex between this reading of
6113 __kmp_threads_capacity and the writing by __kmp_register_root.
6114 Alternatively, we can use a counter of roots that is
6115 atomically updated by __kmp_get_global_thread_id_reg,
6116 __kmp_do_serial_initialize and __kmp_internal_end_*.
6117 */
6118
6119 /* should we finish the run-time? are all siblings done? */
6120 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6121
6122 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6123 if ( KMP_UBER_GTID( i ) ) {
6124 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6125 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6126 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6127 return;
6128 };
6129 }
6130
6131 /* now we can safely conduct the actual termination */
6132
6133 __kmp_internal_end();
6134
6135 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6136 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6137
Jim Cownie77c2a632014-09-03 11:34:33 +00006138 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006139
6140 #ifdef DUMP_DEBUG_ON_EXIT
6141 if ( __kmp_debug_buf )
6142 __kmp_dump_debug_buffer();
6143 #endif
6144} // __kmp_internal_end_thread
6145
6146// -------------------------------------------------------------------------------------------------
6147// Library registration stuff.
6148
6149static long __kmp_registration_flag = 0;
6150 // Random value used to indicate library initialization.
6151static char * __kmp_registration_str = NULL;
6152 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6153
6154
6155static inline
6156char *
6157__kmp_reg_status_name() {
6158 /*
6159 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6160 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6161 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6162 */
6163 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6164} // __kmp_reg_status_get
6165
6166
6167void
6168__kmp_register_library_startup(
6169 void
6170) {
6171
6172 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6173 int done = 0;
6174 union {
6175 double dtime;
6176 long ltime;
6177 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006178 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006179 __kmp_initialize_system_tick();
6180 #endif
6181 __kmp_read_system_time( & time.dtime );
6182 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6183 __kmp_registration_str =
6184 __kmp_str_format(
6185 "%p-%lx-%s",
6186 & __kmp_registration_flag,
6187 __kmp_registration_flag,
6188 KMP_LIBRARY_FILE
6189 );
6190
6191 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6192
6193 while ( ! done ) {
6194
6195 char * value = NULL; // Actual value of the environment variable.
6196
6197 // Set environment variable, but do not overwrite if it is exist.
6198 __kmp_env_set( name, __kmp_registration_str, 0 );
6199 // Check the variable is written.
6200 value = __kmp_env_get( name );
6201 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6202
6203 done = 1; // Ok, environment variable set successfully, exit the loop.
6204
6205 } else {
6206
6207 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6208 // Check whether it alive or dead.
6209 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6210 char * tail = value;
6211 char * flag_addr_str = NULL;
6212 char * flag_val_str = NULL;
6213 char const * file_name = NULL;
6214 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6215 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6216 file_name = tail;
6217 if ( tail != NULL ) {
6218 long * flag_addr = 0;
6219 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006220 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6221 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006222 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6223 // First, check whether environment-encoded address is mapped into addr space.
6224 // If so, dereference it to see if it still has the right value.
6225
6226 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6227 neighbor = 1;
6228 } else {
6229 // If not, then we know the other copy of the library is no longer running.
6230 neighbor = 2;
6231 }; // if
6232 }; // if
6233 }; // if
6234 switch ( neighbor ) {
6235 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6236 // Assume it is the incompatible format of future version of the library.
6237 // Assume the other library is alive.
6238 // WARN( ... ); // TODO: Issue a warning.
6239 file_name = "unknown library";
6240 // Attention! Falling to the next case. That's intentional.
6241 case 1 : { // Neighbor is alive.
6242 // Check it is allowed.
6243 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6244 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6245 // That's not allowed. Issue fatal error.
6246 __kmp_msg(
6247 kmp_ms_fatal,
6248 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6249 KMP_HNT( DuplicateLibrary ),
6250 __kmp_msg_null
6251 );
6252 }; // if
6253 KMP_INTERNAL_FREE( duplicate_ok );
6254 __kmp_duplicate_library_ok = 1;
6255 done = 1; // Exit the loop.
6256 } break;
6257 case 2 : { // Neighbor is dead.
6258 // Clear the variable and try to register library again.
6259 __kmp_env_unset( name );
6260 } break;
6261 default : {
6262 KMP_DEBUG_ASSERT( 0 );
6263 } break;
6264 }; // switch
6265
6266 }; // if
6267 KMP_INTERNAL_FREE( (void *) value );
6268
6269 }; // while
6270 KMP_INTERNAL_FREE( (void *) name );
6271
6272} // func __kmp_register_library_startup
6273
6274
6275void
6276__kmp_unregister_library( void ) {
6277
6278 char * name = __kmp_reg_status_name();
6279 char * value = __kmp_env_get( name );
6280
6281 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6282 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6283 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6284 // Ok, this is our variable. Delete it.
6285 __kmp_env_unset( name );
6286 }; // if
6287
6288 KMP_INTERNAL_FREE( __kmp_registration_str );
6289 KMP_INTERNAL_FREE( value );
6290 KMP_INTERNAL_FREE( name );
6291
6292 __kmp_registration_flag = 0;
6293 __kmp_registration_str = NULL;
6294
6295} // __kmp_unregister_library
6296
6297
6298// End of Library registration stuff.
6299// -------------------------------------------------------------------------------------------------
6300
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006301#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6302
6303static void __kmp_check_mic_type()
6304{
6305 kmp_cpuid_t cpuid_state = {0};
6306 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006307 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006308 // We don't support mic1 at the moment
6309 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6310 __kmp_mic_type = mic2;
6311 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6312 __kmp_mic_type = mic3;
6313 } else {
6314 __kmp_mic_type = non_mic;
6315 }
6316}
6317
6318#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6319
Jim Cownie5e8470a2013-09-27 10:38:44 +00006320static void
6321__kmp_do_serial_initialize( void )
6322{
6323 int i, gtid;
6324 int size;
6325
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006326 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006327
6328 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6329 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6330 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6331 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6332 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6333
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006334#if OMPT_SUPPORT
6335 ompt_pre_init();
6336#endif
6337
Jim Cownie5e8470a2013-09-27 10:38:44 +00006338 __kmp_validate_locks();
6339
6340 /* Initialize internal memory allocator */
6341 __kmp_init_allocator();
6342
6343 /* Register the library startup via an environment variable
6344 and check to see whether another copy of the library is already
6345 registered. */
6346
6347 __kmp_register_library_startup( );
6348
6349 /* TODO reinitialization of library */
6350 if( TCR_4(__kmp_global.g.g_done) ) {
6351 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6352 }
6353
6354 __kmp_global.g.g_abort = 0;
6355 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6356
6357 /* initialize the locks */
6358#if KMP_USE_ADAPTIVE_LOCKS
6359#if KMP_DEBUG_ADAPTIVE_LOCKS
6360 __kmp_init_speculative_stats();
6361#endif
6362#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006363#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00006364 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006365#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006366 __kmp_init_lock( & __kmp_global_lock );
6367 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6368 __kmp_init_lock( & __kmp_debug_lock );
6369 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6370 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6371 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6372 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6373 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6374 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6375 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6376 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6377 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6378 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6382 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6383 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006384#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006385 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006386#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006387 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6388
6389 /* conduct initialization and initial setup of configuration */
6390
6391 __kmp_runtime_initialize();
6392
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006393#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6394 __kmp_check_mic_type();
6395#endif
6396
Jim Cownie5e8470a2013-09-27 10:38:44 +00006397 // Some global variable initialization moved here from kmp_env_initialize()
6398#ifdef KMP_DEBUG
6399 kmp_diag = 0;
6400#endif
6401 __kmp_abort_delay = 0;
6402
6403 // From __kmp_init_dflt_team_nth()
6404 /* assume the entire machine will be used */
6405 __kmp_dflt_team_nth_ub = __kmp_xproc;
6406 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6407 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6408 }
6409 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6410 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6411 }
6412 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006413
6414 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6415 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006416#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006417 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6418 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006419#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006420 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6421 __kmp_library = library_throughput;
6422 // From KMP_SCHEDULE initialization
6423 __kmp_static = kmp_sch_static_balanced;
6424 // AC: do not use analytical here, because it is non-monotonous
6425 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006426 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006427 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6428 // control parts
6429 #if KMP_FAST_REDUCTION_BARRIER
6430 #define kmp_reduction_barrier_gather_bb ((int)1)
6431 #define kmp_reduction_barrier_release_bb ((int)1)
6432 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6433 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6434 #endif // KMP_FAST_REDUCTION_BARRIER
6435 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6436 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6437 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6438 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6439 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6440 #if KMP_FAST_REDUCTION_BARRIER
6441 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6442 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6443 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6444 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6445 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6446 }
6447 #endif // KMP_FAST_REDUCTION_BARRIER
6448 }
6449 #if KMP_FAST_REDUCTION_BARRIER
6450 #undef kmp_reduction_barrier_release_pat
6451 #undef kmp_reduction_barrier_gather_pat
6452 #undef kmp_reduction_barrier_release_bb
6453 #undef kmp_reduction_barrier_gather_bb
6454 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006455#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006456 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006457 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006458 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006459 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6460 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6461 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6462 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006463#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006464 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006465 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6466 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6467 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006468#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006469#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006470
6471 // From KMP_CHECKS initialization
6472#ifdef KMP_DEBUG
6473 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6474#else
6475 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6476#endif
6477
6478 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6479 __kmp_foreign_tp = TRUE;
6480
6481 __kmp_global.g.g_dynamic = FALSE;
6482 __kmp_global.g.g_dynamic_mode = dynamic_default;
6483
6484 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006485
Jim Cownie5e8470a2013-09-27 10:38:44 +00006486 // Print all messages in message catalog for testing purposes.
6487 #ifdef KMP_DEBUG
6488 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6489 if ( __kmp_str_match_true( val ) ) {
6490 kmp_str_buf_t buffer;
6491 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006492 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006493 __kmp_printf( "%s", buffer.str );
6494 __kmp_str_buf_free( & buffer );
6495 }; // if
6496 __kmp_env_free( & val );
6497 #endif
6498
Jim Cownie181b4bb2013-12-23 17:28:57 +00006499 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006500 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6501 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6502
Jim Cownie5e8470a2013-09-27 10:38:44 +00006503 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6504 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6505 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6506 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6507 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6508 __kmp_thread_pool = NULL;
6509 __kmp_thread_pool_insert_pt = NULL;
6510 __kmp_team_pool = NULL;
6511
6512 /* Allocate all of the variable sized records */
6513 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6514 /* Since allocation is cache-aligned, just add extra padding at the end */
6515 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6516 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6517 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6518
6519 /* init thread counts */
6520 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6521 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6522 __kmp_all_nth = 0;
6523 __kmp_nth = 0;
6524
6525 /* setup the uber master thread and hierarchy */
6526 gtid = __kmp_register_root( TRUE );
6527 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6528 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6529 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6530
6531 KMP_MB(); /* Flush all pending memory write invalidates. */
6532
6533 __kmp_common_initialize();
6534
6535 #if KMP_OS_UNIX
6536 /* invoke the child fork handler */
6537 __kmp_register_atfork();
6538 #endif
6539
Jonathan Peyton99016992015-05-26 17:32:53 +00006540 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006541 {
6542 /* Invoke the exit handler when the program finishes, only for static library.
6543 For dynamic library, we already have _fini and DllMain.
6544 */
6545 int rc = atexit( __kmp_internal_end_atexit );
6546 if ( rc != 0 ) {
6547 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6548 }; // if
6549 }
6550 #endif
6551
6552 #if KMP_HANDLE_SIGNALS
6553 #if KMP_OS_UNIX
6554 /* NOTE: make sure that this is called before the user installs
6555 * their own signal handlers so that the user handlers
6556 * are called first. this way they can return false,
6557 * not call our handler, avoid terminating the library,
6558 * and continue execution where they left off. */
6559 __kmp_install_signals( FALSE );
6560 #endif /* KMP_OS_UNIX */
6561 #if KMP_OS_WINDOWS
6562 __kmp_install_signals( TRUE );
6563 #endif /* KMP_OS_WINDOWS */
6564 #endif
6565
6566 /* we have finished the serial initialization */
6567 __kmp_init_counter ++;
6568
6569 __kmp_init_serial = TRUE;
6570
6571 if (__kmp_settings) {
6572 __kmp_env_print();
6573 }
6574
6575#if OMP_40_ENABLED
6576 if (__kmp_display_env || __kmp_display_env_verbose) {
6577 __kmp_env_print_2();
6578 }
6579#endif // OMP_40_ENABLED
6580
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006581#if OMPT_SUPPORT
6582 ompt_post_init();
6583#endif
6584
Jim Cownie5e8470a2013-09-27 10:38:44 +00006585 KMP_MB();
6586
6587 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6588}
6589
6590void
6591__kmp_serial_initialize( void )
6592{
6593 if ( __kmp_init_serial ) {
6594 return;
6595 }
6596 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6597 if ( __kmp_init_serial ) {
6598 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6599 return;
6600 }
6601 __kmp_do_serial_initialize();
6602 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6603}
6604
6605static void
6606__kmp_do_middle_initialize( void )
6607{
6608 int i, j;
6609 int prev_dflt_team_nth;
6610
6611 if( !__kmp_init_serial ) {
6612 __kmp_do_serial_initialize();
6613 }
6614
6615 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6616
6617 //
6618 // Save the previous value for the __kmp_dflt_team_nth so that
6619 // we can avoid some reinitialization if it hasn't changed.
6620 //
6621 prev_dflt_team_nth = __kmp_dflt_team_nth;
6622
Alp Toker98758b02014-03-02 04:12:06 +00006623#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006624 //
6625 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6626 // number of cores on the machine.
6627 //
6628 __kmp_affinity_initialize();
6629
6630 //
6631 // Run through the __kmp_threads array and set the affinity mask
6632 // for each root thread that is currently registered with the RTL.
6633 //
6634 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6635 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6636 __kmp_affinity_set_init_mask( i, TRUE );
6637 }
6638 }
Alp Toker98758b02014-03-02 04:12:06 +00006639#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006640
6641 KMP_ASSERT( __kmp_xproc > 0 );
6642 if ( __kmp_avail_proc == 0 ) {
6643 __kmp_avail_proc = __kmp_xproc;
6644 }
6645
6646 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6647 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006648 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006649 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6650 j++;
6651 }
6652
6653 if ( __kmp_dflt_team_nth == 0 ) {
6654#ifdef KMP_DFLT_NTH_CORES
6655 //
6656 // Default #threads = #cores
6657 //
6658 __kmp_dflt_team_nth = __kmp_ncores;
6659 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6660 __kmp_dflt_team_nth ) );
6661#else
6662 //
6663 // Default #threads = #available OS procs
6664 //
6665 __kmp_dflt_team_nth = __kmp_avail_proc;
6666 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6667 __kmp_dflt_team_nth ) );
6668#endif /* KMP_DFLT_NTH_CORES */
6669 }
6670
6671 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6672 __kmp_dflt_team_nth = KMP_MIN_NTH;
6673 }
6674 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6675 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6676 }
6677
6678 //
6679 // There's no harm in continuing if the following check fails,
6680 // but it indicates an error in the previous logic.
6681 //
6682 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6683
6684 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6685 //
6686 // Run through the __kmp_threads array and set the num threads icv
6687 // for each root thread that is currently registered with the RTL
6688 // (which has not already explicitly set its nthreads-var with a
6689 // call to omp_set_num_threads()).
6690 //
6691 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6692 kmp_info_t *thread = __kmp_threads[ i ];
6693 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006694 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006695
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006696 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006697 }
6698 }
6699 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6700 __kmp_dflt_team_nth) );
6701
6702#ifdef KMP_ADJUST_BLOCKTIME
6703 /* Adjust blocktime to zero if necessary */
6704 /* now that __kmp_avail_proc is set */
6705 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6706 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6707 if ( __kmp_nth > __kmp_avail_proc ) {
6708 __kmp_zero_bt = TRUE;
6709 }
6710 }
6711#endif /* KMP_ADJUST_BLOCKTIME */
6712
6713 /* we have finished middle initialization */
6714 TCW_SYNC_4(__kmp_init_middle, TRUE);
6715
6716 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6717}
6718
6719void
6720__kmp_middle_initialize( void )
6721{
6722 if ( __kmp_init_middle ) {
6723 return;
6724 }
6725 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6726 if ( __kmp_init_middle ) {
6727 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6728 return;
6729 }
6730 __kmp_do_middle_initialize();
6731 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6732}
6733
6734void
6735__kmp_parallel_initialize( void )
6736{
6737 int gtid = __kmp_entry_gtid(); // this might be a new root
6738
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006739 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006740 if( TCR_4(__kmp_init_parallel) ) return;
6741 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6742 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6743
6744 /* TODO reinitialization after we have already shut down */
6745 if( TCR_4(__kmp_global.g.g_done) ) {
6746 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6747 __kmp_infinite_loop();
6748 }
6749
6750 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6751 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6752 */
6753 if( !__kmp_init_middle ) {
6754 __kmp_do_middle_initialize();
6755 }
6756
6757 /* begin initialization */
6758 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6759 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6760
6761#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6762 //
6763 // Save the FP control regs.
6764 // Worker threads will set theirs to these values at thread startup.
6765 //
6766 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6767 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6768 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6769#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6770
6771#if KMP_OS_UNIX
6772# if KMP_HANDLE_SIGNALS
6773 /* must be after __kmp_serial_initialize */
6774 __kmp_install_signals( TRUE );
6775# endif
6776#endif
6777
6778 __kmp_suspend_initialize();
6779
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006780#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006781 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6782 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6783 }
6784#else
6785 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6786 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6787 }
6788#endif
6789
6790 if ( __kmp_version ) {
6791 __kmp_print_version_2();
6792 }
6793
Jim Cownie5e8470a2013-09-27 10:38:44 +00006794 /* we have finished parallel initialization */
6795 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6796
6797 KMP_MB();
6798 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6799
6800 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6801}
6802
6803
6804/* ------------------------------------------------------------------------ */
6805
6806void
6807__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6808 kmp_team_t *team )
6809{
6810 kmp_disp_t *dispatch;
6811
6812 KMP_MB();
6813
6814 /* none of the threads have encountered any constructs, yet. */
6815 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006816#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006817 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006818#endif /* KMP_CACHE_MANAGE */
6819 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6820 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006821 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6822 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006823
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006824 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006825#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006826 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6827#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006828 if( __kmp_env_consistency_check )
6829 __kmp_push_parallel( gtid, team->t.t_ident );
6830
6831 KMP_MB(); /* Flush all pending memory write invalidates. */
6832}
6833
6834void
6835__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6836 kmp_team_t *team )
6837{
6838 if( __kmp_env_consistency_check )
6839 __kmp_pop_parallel( gtid, team->t.t_ident );
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006840
6841 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006842}
6843
6844int
6845__kmp_invoke_task_func( int gtid )
6846{
6847 int rc;
6848 int tid = __kmp_tid_from_gtid( gtid );
6849 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006850 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851
6852 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6853#if USE_ITT_BUILD
6854 if ( __itt_stack_caller_create_ptr ) {
6855 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6856 }
6857#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006858#if INCLUDE_SSC_MARKS
6859 SSC_MARK_INVOKING();
6860#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006861
6862#if OMPT_SUPPORT
6863 void *dummy;
6864 void **exit_runtime_p;
6865 ompt_task_id_t my_task_id;
6866 ompt_parallel_id_t my_parallel_id;
6867
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006868 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006869 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6870 ompt_task_info.frame.exit_runtime_frame);
6871 } else {
6872 exit_runtime_p = &dummy;
6873 }
6874
6875#if OMPT_TRACE
6876 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6877 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006878 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006879 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6880 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6881 my_parallel_id, my_task_id);
6882 }
6883#endif
6884#endif
6885
Jonathan Peyton45be4502015-08-11 21:36:41 +00006886 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006887 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6888 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006889 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6890 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006891#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006892 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006893#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006894 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006895#if OMPT_SUPPORT
6896 *exit_runtime_p = NULL;
6897#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006898 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006899
Jim Cownie5e8470a2013-09-27 10:38:44 +00006900#if USE_ITT_BUILD
6901 if ( __itt_stack_caller_create_ptr ) {
6902 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6903 }
6904#endif /* USE_ITT_BUILD */
6905 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6906
6907 return rc;
6908}
6909
6910#if OMP_40_ENABLED
6911void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006912__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006913{
6914 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006915 kmp_info_t *thr = __kmp_threads[ gtid ];
6916 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006917 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006918 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6919 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6920 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006921 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006922 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006923 // Launch league of teams now, but not let workers execute
6924 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006925#if INCLUDE_SSC_MARKS
6926 SSC_MARK_FORKING();
6927#endif
6928 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006929 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006930#if OMPT_SUPPORT
6931 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6932#endif
6933 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006934 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6935 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006936#if INCLUDE_SSC_MARKS
6937 SSC_MARK_JOINING();
6938#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006939
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006940 // AC: last parameter "1" eliminates join barrier which won't work because
6941 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006942 __kmp_join_call( loc, gtid
6943#if OMPT_SUPPORT
6944 , fork_context_intel
6945#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006946 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006947}
6948
6949int
6950__kmp_invoke_teams_master( int gtid )
6951{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006952 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6953 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006954 #if KMP_DEBUG
6955 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6956 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6957 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006958 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6959 __kmp_teams_master( gtid );
6960 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006961 return 1;
6962}
6963#endif /* OMP_40_ENABLED */
6964
6965/* this sets the requested number of threads for the next parallel region
6966 * encountered by this team */
6967/* since this should be enclosed in the forkjoin critical section it
6968 * should avoid race conditions with assymmetrical nested parallelism */
6969
6970void
6971__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6972{
6973 kmp_info_t *thr = __kmp_threads[gtid];
6974
6975 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006976 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006977}
6978
6979#if OMP_40_ENABLED
6980
6981/* this sets the requested number of teams for the teams region and/or
6982 * the number of threads for the next parallel region encountered */
6983void
6984__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6985{
6986 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006987 KMP_DEBUG_ASSERT(num_teams >= 0);
6988 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006989
6990 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006991 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006992 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6993 if ( !__kmp_reserve_warn ) {
6994 __kmp_reserve_warn = 1;
6995 __kmp_msg(
6996 kmp_ms_warning,
6997 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6998 KMP_HNT( Unset_ALL_THREADS ),
6999 __kmp_msg_null
7000 );
7001 }
7002 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007003 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007004 // Set number of teams (number of threads in the outer "parallel" of the teams)
7005 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7006
7007 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007008 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007009 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007010 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007011 num_threads = __kmp_avail_proc / num_teams;
7012 if( num_teams * num_threads > __kmp_max_nth ) {
7013 // adjust num_threads w/o warning as it is not user setting
7014 num_threads = __kmp_max_nth / num_teams;
7015 }
7016 } else {
7017 if( num_teams * num_threads > __kmp_max_nth ) {
7018 int new_threads = __kmp_max_nth / num_teams;
7019 if ( !__kmp_reserve_warn ) { // user asked for too many threads
7020 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
7021 __kmp_msg(
7022 kmp_ms_warning,
7023 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
7024 KMP_HNT( Unset_ALL_THREADS ),
7025 __kmp_msg_null
7026 );
7027 }
7028 num_threads = new_threads;
7029 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007030 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007031 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007032}
7033
7034
7035//
7036// Set the proc_bind var to use in the following parallel region.
7037//
7038void
7039__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7040{
7041 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007042 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007043}
7044
7045#endif /* OMP_40_ENABLED */
7046
7047/* Launch the worker threads into the microtask. */
7048
7049void
7050__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7051{
7052 kmp_info_t *this_thr = __kmp_threads[gtid];
7053
7054#ifdef KMP_DEBUG
7055 int f;
7056#endif /* KMP_DEBUG */
7057
7058 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007059 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007060 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7061 KMP_MB(); /* Flush all pending memory write invalidates. */
7062
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007063 team->t.t_construct = 0; /* no single directives seen yet */
7064 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007065
7066 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007067 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007068 if ( team->t.t_max_nproc > 1 ) {
7069 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007070 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007071 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007072#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007073 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7074#endif
7075 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007076 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007077 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007078#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007079 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007081 }
7082
7083 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007084 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007085
7086#ifdef KMP_DEBUG
7087 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7088 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7089 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7090 }
7091#endif /* KMP_DEBUG */
7092
7093 /* release the worker threads so they may begin working */
7094 __kmp_fork_barrier( gtid, 0 );
7095}
7096
7097
7098void
7099__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7100{
7101 kmp_info_t *this_thr = __kmp_threads[gtid];
7102
7103 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007104 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007105 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7106 KMP_MB(); /* Flush all pending memory write invalidates. */
7107
7108 /* Join barrier after fork */
7109
7110#ifdef KMP_DEBUG
7111 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7112 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7113 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7114 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7115 __kmp_print_structure();
7116 }
7117 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7118 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7119#endif /* KMP_DEBUG */
7120
7121 __kmp_join_barrier( gtid ); /* wait for everyone */
7122
7123 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007124 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007125}
7126
7127
7128/* ------------------------------------------------------------------------ */
7129/* ------------------------------------------------------------------------ */
7130
7131#ifdef USE_LOAD_BALANCE
7132
7133//
7134// Return the worker threads actively spinning in the hot team, if we
7135// are at the outermost level of parallelism. Otherwise, return 0.
7136//
7137static int
7138__kmp_active_hot_team_nproc( kmp_root_t *root )
7139{
7140 int i;
7141 int retval;
7142 kmp_team_t *hot_team;
7143
7144 if ( root->r.r_active ) {
7145 return 0;
7146 }
7147 hot_team = root->r.r_hot_team;
7148 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7149 return hot_team->t.t_nproc - 1; // Don't count master thread
7150 }
7151
7152 //
7153 // Skip the master thread - it is accounted for elsewhere.
7154 //
7155 retval = 0;
7156 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7157 if ( hot_team->t.t_threads[i]->th.th_active ) {
7158 retval++;
7159 }
7160 }
7161 return retval;
7162}
7163
7164//
7165// Perform an automatic adjustment to the number of
7166// threads used by the next parallel region.
7167//
7168static int
7169__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7170{
7171 int retval;
7172 int pool_active;
7173 int hot_team_active;
7174 int team_curr_active;
7175 int system_active;
7176
7177 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7178 root, set_nproc ) );
7179 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007180 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007181 KMP_DEBUG_ASSERT( set_nproc > 1 );
7182
7183 if ( set_nproc == 1) {
7184 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7185 return 1;
7186 }
7187
7188 //
7189 // Threads that are active in the thread pool, active in the hot team
7190 // for this particular root (if we are at the outer par level), and
7191 // the currently executing thread (to become the master) are available
7192 // to add to the new team, but are currently contributing to the system
7193 // load, and must be accounted for.
7194 //
7195 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7196 hot_team_active = __kmp_active_hot_team_nproc( root );
7197 team_curr_active = pool_active + hot_team_active + 1;
7198
7199 //
7200 // Check the system load.
7201 //
7202 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7203 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7204 system_active, pool_active, hot_team_active ) );
7205
7206 if ( system_active < 0 ) {
7207 //
7208 // There was an error reading the necessary info from /proc,
7209 // so use the thread limit algorithm instead. Once we set
7210 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7211 // we shouldn't wind up getting back here.
7212 //
7213 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7214 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7215
7216 //
7217 // Make this call behave like the thread limit algorithm.
7218 //
7219 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7220 : root->r.r_hot_team->t.t_nproc);
7221 if ( retval > set_nproc ) {
7222 retval = set_nproc;
7223 }
7224 if ( retval < KMP_MIN_NTH ) {
7225 retval = KMP_MIN_NTH;
7226 }
7227
7228 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7229 return retval;
7230 }
7231
7232 //
7233 // There is a slight delay in the load balance algorithm in detecting
7234 // new running procs. The real system load at this instant should be
7235 // at least as large as the #active omp thread that are available to
7236 // add to the team.
7237 //
7238 if ( system_active < team_curr_active ) {
7239 system_active = team_curr_active;
7240 }
7241 retval = __kmp_avail_proc - system_active + team_curr_active;
7242 if ( retval > set_nproc ) {
7243 retval = set_nproc;
7244 }
7245 if ( retval < KMP_MIN_NTH ) {
7246 retval = KMP_MIN_NTH;
7247 }
7248
7249 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7250 return retval;
7251} // __kmp_load_balance_nproc()
7252
7253#endif /* USE_LOAD_BALANCE */
7254
Jim Cownie5e8470a2013-09-27 10:38:44 +00007255/* ------------------------------------------------------------------------ */
7256/* ------------------------------------------------------------------------ */
7257
7258/* NOTE: this is called with the __kmp_init_lock held */
7259void
7260__kmp_cleanup( void )
7261{
7262 int f;
7263
7264 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7265
7266 if (TCR_4(__kmp_init_parallel)) {
7267#if KMP_HANDLE_SIGNALS
7268 __kmp_remove_signals();
7269#endif
7270 TCW_4(__kmp_init_parallel, FALSE);
7271 }
7272
7273 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007274#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007275 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007276#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007277 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007278 TCW_4(__kmp_init_middle, FALSE);
7279 }
7280
7281 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7282
7283 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007284 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007285 __kmp_init_serial = FALSE;
7286 }
7287
7288 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7289 if ( __kmp_root[ f ] != NULL ) {
7290 __kmp_free( __kmp_root[ f ] );
7291 __kmp_root[ f ] = NULL;
7292 }
7293 }
7294 __kmp_free( __kmp_threads );
7295 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7296 // freeing __kmp_root.
7297 __kmp_threads = NULL;
7298 __kmp_root = NULL;
7299 __kmp_threads_capacity = 0;
7300
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007301#if KMP_USE_DYNAMIC_LOCK
7302 __kmp_cleanup_indirect_user_locks();
7303#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007304 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007305#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007306
Alp Toker98758b02014-03-02 04:12:06 +00007307 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007308 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7309 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007310 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311
7312 #if KMP_USE_ADAPTIVE_LOCKS
7313 #if KMP_DEBUG_ADAPTIVE_LOCKS
7314 __kmp_print_speculative_stats();
7315 #endif
7316 #endif
7317 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7318 __kmp_nested_nth.nth = NULL;
7319 __kmp_nested_nth.size = 0;
7320 __kmp_nested_nth.used = 0;
Jonathan Peytond0365a22017-01-18 06:40:19 +00007321 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types );
7322 __kmp_nested_proc_bind.bind_types = NULL;
7323 __kmp_nested_proc_bind.size = 0;
7324 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007325
7326 __kmp_i18n_catclose();
7327
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007328#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00007329 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007330#endif
7331
Jim Cownie5e8470a2013-09-27 10:38:44 +00007332 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7333}
7334
7335/* ------------------------------------------------------------------------ */
7336/* ------------------------------------------------------------------------ */
7337
7338int
7339__kmp_ignore_mppbeg( void )
7340{
7341 char *env;
7342
7343 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7344 if (__kmp_str_match_false( env ))
7345 return FALSE;
7346 }
7347 // By default __kmpc_begin() is no-op.
7348 return TRUE;
7349}
7350
7351int
7352__kmp_ignore_mppend( void )
7353{
7354 char *env;
7355
7356 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7357 if (__kmp_str_match_false( env ))
7358 return FALSE;
7359 }
7360 // By default __kmpc_end() is no-op.
7361 return TRUE;
7362}
7363
7364void
7365__kmp_internal_begin( void )
7366{
7367 int gtid;
7368 kmp_root_t *root;
7369
7370 /* this is a very important step as it will register new sibling threads
7371 * and assign these new uber threads a new gtid */
7372 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007373 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007374 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7375
7376 if( root->r.r_begin ) return;
7377 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7378 if( root->r.r_begin ) {
7379 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7380 return;
7381 }
7382
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007383 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007384
7385 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7386}
7387
7388
7389/* ------------------------------------------------------------------------ */
7390/* ------------------------------------------------------------------------ */
7391
7392void
7393__kmp_user_set_library (enum library_type arg)
7394{
7395 int gtid;
7396 kmp_root_t *root;
7397 kmp_info_t *thread;
7398
7399 /* first, make sure we are initialized so we can get our gtid */
7400
7401 gtid = __kmp_entry_gtid();
7402 thread = __kmp_threads[ gtid ];
7403
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007404 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007405
7406 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7407 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7408 KMP_WARNING( SetLibraryIncorrectCall );
7409 return;
7410 }
7411
7412 switch ( arg ) {
7413 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007414 thread->th.th_set_nproc = 0;
7415 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007416 break;
7417 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007418 thread->th.th_set_nproc = 0;
7419 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007420 break;
7421 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007422 thread->th.th_set_nproc = 0;
7423 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007424 break;
7425 default:
7426 KMP_FATAL( UnknownLibraryType, arg );
7427 }
7428
7429 __kmp_aux_set_library ( arg );
7430}
7431
7432void
7433__kmp_aux_set_stacksize( size_t arg )
7434{
7435 if (! __kmp_init_serial)
7436 __kmp_serial_initialize();
7437
7438#if KMP_OS_DARWIN
7439 if (arg & (0x1000 - 1)) {
7440 arg &= ~(0x1000 - 1);
7441 if(arg + 0x1000) /* check for overflow if we round up */
7442 arg += 0x1000;
7443 }
7444#endif
7445 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7446
7447 /* only change the default stacksize before the first parallel region */
7448 if (! TCR_4(__kmp_init_parallel)) {
7449 size_t value = arg; /* argument is in bytes */
7450
7451 if (value < __kmp_sys_min_stksize )
7452 value = __kmp_sys_min_stksize ;
7453 else if (value > KMP_MAX_STKSIZE)
7454 value = KMP_MAX_STKSIZE;
7455
7456 __kmp_stksize = value;
7457
7458 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7459 }
7460
7461 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7462}
7463
7464/* set the behaviour of the runtime library */
7465/* TODO this can cause some odd behaviour with sibling parallelism... */
7466void
7467__kmp_aux_set_library (enum library_type arg)
7468{
7469 __kmp_library = arg;
7470
7471 switch ( __kmp_library ) {
7472 case library_serial :
7473 {
7474 KMP_INFORM( LibraryIsSerial );
7475 (void) __kmp_change_library( TRUE );
7476 }
7477 break;
7478 case library_turnaround :
7479 (void) __kmp_change_library( TRUE );
7480 break;
7481 case library_throughput :
7482 (void) __kmp_change_library( FALSE );
7483 break;
7484 default:
7485 KMP_FATAL( UnknownLibraryType, arg );
7486 }
7487}
7488
7489/* ------------------------------------------------------------------------ */
7490/* ------------------------------------------------------------------------ */
7491
7492void
7493__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7494{
7495 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007496#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007497 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007498#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007499 int bt_set;
7500
7501 __kmp_save_internal_controls( thread );
7502
7503 /* Normalize and set blocktime for the teams */
7504 if (blocktime < KMP_MIN_BLOCKTIME)
7505 blocktime = KMP_MIN_BLOCKTIME;
7506 else if (blocktime > KMP_MAX_BLOCKTIME)
7507 blocktime = KMP_MAX_BLOCKTIME;
7508
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007509 set__blocktime_team( thread->th.th_team, tid, blocktime );
7510 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007511
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007512#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007513 /* Calculate and set blocktime intervals for the teams */
7514 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7515
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007516 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7517 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007518#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007519
7520 /* Set whether blocktime has been set to "TRUE" */
7521 bt_set = TRUE;
7522
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007523 set__bt_set_team( thread->th.th_team, tid, bt_set );
7524 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007525#if KMP_USE_MONITOR
Samuel Antao33515192016-10-20 13:20:17 +00007526 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7527 "bt_intervals=%d, monitor_updates=%d\n",
7528 __kmp_gtid_from_tid(tid, thread->th.th_team),
7529 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7530 __kmp_monitor_wakeups));
7531#else
7532 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7533 __kmp_gtid_from_tid(tid, thread->th.th_team),
7534 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007535#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007536}
7537
7538void
7539__kmp_aux_set_defaults(
7540 char const * str,
7541 int len
7542) {
7543 if ( ! __kmp_init_serial ) {
7544 __kmp_serial_initialize();
7545 };
7546 __kmp_env_initialize( str );
7547
7548 if (__kmp_settings
7549#if OMP_40_ENABLED
7550 || __kmp_display_env || __kmp_display_env_verbose
7551#endif // OMP_40_ENABLED
7552 ) {
7553 __kmp_env_print();
7554 }
7555} // __kmp_aux_set_defaults
7556
7557/* ------------------------------------------------------------------------ */
7558
7559/*
7560 * internal fast reduction routines
7561 */
7562
Jim Cownie5e8470a2013-09-27 10:38:44 +00007563PACKED_REDUCTION_METHOD_T
7564__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7565 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7566 kmp_critical_name *lck )
7567{
7568
7569 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7570 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7571 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7572 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7573
7574 PACKED_REDUCTION_METHOD_T retval;
7575
7576 int team_size;
7577
7578 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7579 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7580
7581 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7582 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7583
7584 retval = critical_reduce_block;
7585
7586 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7587
7588 if( team_size == 1 ) {
7589
7590 retval = empty_reduce_block;
7591
7592 } else {
7593
7594 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7595 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7596
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007597 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007598
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007599 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007600
7601 int teamsize_cutoff = 4;
7602
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007603#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7604 if( __kmp_mic_type != non_mic ) {
7605 teamsize_cutoff = 8;
7606 }
7607#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007608 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007609 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007610 if ( atomic_available ) {
7611 retval = atomic_reduce_block;
7612 }
7613 } else {
7614 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7615 }
7616 } else if ( atomic_available ) {
7617 retval = atomic_reduce_block;
7618 }
7619 #else
7620 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007621 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007622
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007623 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007624
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007625 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007626
Jim Cownie5e8470a2013-09-27 10:38:44 +00007627 // basic tuning
7628
7629 if( atomic_available ) {
7630 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7631 retval = atomic_reduce_block;
7632 }
7633 } // otherwise: use critical section
7634
7635 #elif KMP_OS_DARWIN
7636
Jim Cownie5e8470a2013-09-27 10:38:44 +00007637 if( atomic_available && ( num_vars <= 3 ) ) {
7638 retval = atomic_reduce_block;
7639 } else if( tree_available ) {
7640 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7641 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7642 }
7643 } // otherwise: use critical section
7644
7645 #else
7646 #error "Unknown or unsupported OS"
7647 #endif
7648
7649 #else
7650 #error "Unknown or unsupported architecture"
7651 #endif
7652
7653 }
7654
Jim Cownie5e8470a2013-09-27 10:38:44 +00007655 // KMP_FORCE_REDUCTION
7656
Andrey Churbanovec23a952015-08-17 10:12:12 +00007657 // If the team is serialized (team_size == 1), ignore the forced reduction
7658 // method and stay with the unsynchronized method (empty_reduce_block)
7659 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007660
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007661 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007662
7663 int atomic_available, tree_available;
7664
7665 switch( ( forced_retval = __kmp_force_reduction_method ) )
7666 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007667 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007668 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007669 break;
7670
7671 case atomic_reduce_block:
7672 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007673 if( ! atomic_available ) {
7674 KMP_WARNING(RedMethodNotSupported, "atomic");
7675 forced_retval = critical_reduce_block;
7676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007677 break;
7678
7679 case tree_reduce_block:
7680 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007681 if( ! tree_available ) {
7682 KMP_WARNING(RedMethodNotSupported, "tree");
7683 forced_retval = critical_reduce_block;
7684 } else {
7685 #if KMP_FAST_REDUCTION_BARRIER
7686 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7687 #endif
7688 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007689 break;
7690
7691 default:
7692 KMP_ASSERT( 0 ); // "unsupported method specified"
7693 }
7694
7695 retval = forced_retval;
7696 }
7697
7698 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7699
7700 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7701 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7702
7703 return ( retval );
7704}
7705
7706// this function is for testing set/get/determine reduce method
7707kmp_int32
7708__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007709 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007710}
7711
7712/* ------------------------------------------------------------------------ */