blob: 221023952334edae77465bc155ee41eefb5fd0ef [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000028#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
44char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
45#endif /* defined(KMP_GOMP_COMPAT) */
46
47char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000048#if OMP_50_ENABLED
49 "5.0 (201611)";
50#elif OMP_45_ENABLED
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051 "4.5 (201511)";
52#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000053 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#endif
57
58#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000059char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jim Cownie181b4bb2013-12-23 17:28:57 +000062#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
63
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
65/* ------------------------------------------------------------------------ */
66
67kmp_info_t __kmp_monitor;
68
69/* ------------------------------------------------------------------------ */
70/* ------------------------------------------------------------------------ */
71
72/* Forward declarations */
73
74void __kmp_cleanup( void );
75
76static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000077static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000078#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000079static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000082void __kmp_fork_barrier( int gtid, int tid );
83void __kmp_join_barrier( int gtid );
84void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000085
Jim Cownie5e8470a2013-09-27 10:38:44 +000086#ifdef USE_LOAD_BALANCE
87static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
88#endif
89
90static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000091#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000092static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000093#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000094static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
95static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
96static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
97
98/* ------------------------------------------------------------------------ */
99/* ------------------------------------------------------------------------ */
100
101/* Calculate the identifier of the current thread */
102/* fast (and somewhat portable) way to get unique */
103/* identifier of executing thread. */
104/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
105
106int
107__kmp_get_global_thread_id( )
108{
109 int i;
110 kmp_info_t **other_threads;
111 size_t stack_data;
112 char *stack_addr;
113 size_t stack_size;
114 char *stack_base;
115
116 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
117 __kmp_nth, __kmp_all_nth ));
118
119 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
120 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
121 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
122 __kmp_init_gtid for this to work. */
123
124 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
125
126#ifdef KMP_TDATA_GTID
127 if ( TCR_4(__kmp_gtid_mode) >= 3) {
128 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
129 return __kmp_gtid;
130 }
131#endif
132 if ( TCR_4(__kmp_gtid_mode) >= 2) {
133 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
134 return __kmp_gtid_get_specific();
135 }
136 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
137
138 stack_addr = (char*) & stack_data;
139 other_threads = __kmp_threads;
140
141 /*
142 ATT: The code below is a source of potential bugs due to unsynchronized access to
143 __kmp_threads array. For example:
144 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
145 2. Current thread is suspended by OS.
146 3. Another thread unregisters and finishes (debug versions of free() may fill memory
147 with something like 0xEF).
148 4. Current thread is resumed.
149 5. Current thread reads junk from *thr.
150 TODO: Fix it.
151 --ln
152 */
153
154 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
155
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 if( !thr ) continue;
158
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000159 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
160 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161
162 /* stack grows down -- search through all of the active threads */
163
164 if( stack_addr <= stack_base ) {
165 size_t stack_diff = stack_base - stack_addr;
166
167 if( stack_diff <= stack_size ) {
168 /* The only way we can be closer than the allocated */
169 /* stack size is if we are running on this thread. */
170 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
171 return i;
172 }
173 }
174 }
175
176 /* get specific to try and determine our gtid */
177 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
178 "thread, using TLS\n" ));
179 i = __kmp_gtid_get_specific();
180
181 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
182
183 /* if we havn't been assigned a gtid, then return code */
184 if( i<0 ) return i;
185
186 /* dynamically updated stack window for uber threads to avoid get_specific call */
187 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
188 KMP_FATAL( StackOverflow, i );
189 }
190
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000192 if( stack_addr > stack_base ) {
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
195 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
196 } else {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
198 }
199
200 /* Reprint stack bounds for ubermaster since they have been refined */
201 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000202 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
203 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000205 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206 "th_%d stack (refinement)", i );
207 }
208 return i;
209}
210
211int
212__kmp_get_global_thread_id_reg( )
213{
214 int gtid;
215
216 if ( !__kmp_init_serial ) {
217 gtid = KMP_GTID_DNE;
218 } else
219#ifdef KMP_TDATA_GTID
220 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
221 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
222 gtid = __kmp_gtid;
223 } else
224#endif
225 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
226 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
227 gtid = __kmp_gtid_get_specific();
228 } else {
229 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
230 gtid = __kmp_get_global_thread_id();
231 }
232
233 /* we must be a new uber master sibling thread */
234 if( gtid == KMP_GTID_DNE ) {
235 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
236 "Registering a new gtid.\n" ));
237 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
238 if( !__kmp_init_serial ) {
239 __kmp_do_serial_initialize();
240 gtid = __kmp_gtid_get_specific();
241 } else {
242 gtid = __kmp_register_root(FALSE);
243 }
244 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
245 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
246 }
247
248 KMP_DEBUG_ASSERT( gtid >=0 );
249
250 return gtid;
251}
252
253/* caller must hold forkjoin_lock */
254void
255__kmp_check_stack_overlap( kmp_info_t *th )
256{
257 int f;
258 char *stack_beg = NULL;
259 char *stack_end = NULL;
260 int gtid;
261
262 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
263 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
265 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266
267 gtid = __kmp_gtid_from_thread( th );
268
269 if (gtid == KMP_GTID_MONITOR) {
270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)", "mon",
272 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
273 } else {
274 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
275 "th_%d stack (%s)", gtid,
276 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
277 }
278 }
279
280 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000281 gtid = __kmp_gtid_from_thread( th );
282 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000283 {
284 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
285 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000286 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
287 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 }
289
290 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
291 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
292
293 if( f_th && f_th != th ) {
294 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
295 char *other_stack_beg = other_stack_end -
296 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
297 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
298 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
299
300 /* Print the other stack values before the abort */
301 if ( __kmp_storage_map )
302 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
303 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
304 "th_%d stack (overlapped)",
305 __kmp_gtid_from_thread( f_th ) );
306
307 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
308 }
309 }
310 }
311 }
312 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
313}
314
315
316/* ------------------------------------------------------------------------ */
317
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* ------------------------------------------------------------------------ */
319
320void
321__kmp_infinite_loop( void )
322{
323 static int done = FALSE;
324
325 while (! done) {
326 KMP_YIELD( 1 );
327 }
328}
329
330#define MAX_MESSAGE 512
331
332void
333__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
334 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 va_list ap;
336
337 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000338 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
340 __kmp_vprintf( kmp_err, buffer, ap );
341#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000342 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343 if(gtid >= 0) {
344 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
345 if( __kmp_storage_map_verbose ) {
346 node = __kmp_get_host_node(p1);
347 if(node < 0) /* doesn't work, so don't try this next time */
348 __kmp_storage_map_verbose = FALSE;
349 else {
350 char *last;
351 int lastNode;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
353
Jonathan Peyton762bc462016-10-26 21:42:48 +0000354 const int page_size = KMP_GET_PAGE_SIZE();
355
356 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) );
357 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358 if(localProc >= 0)
359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
360 else
361 __kmp_printf_no_lock(" GTID %d\n", gtid);
362# if KMP_USE_PRCTL
363/* The more elaborate format is disabled for now because of the prctl hanging bug. */
364 do {
365 last = p1;
366 lastNode = node;
367 /* This loop collates adjacent pages with the same host node. */
368 do {
Jonathan Peyton762bc462016-10-26 21:42:48 +0000369 (char*)p1 += page_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
371 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
372 (char*)p1 - 1, lastNode);
373 } while(p1 <= p2);
374# else
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000376 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377 if(p1 < p2) {
378 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000379 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000380 }
381# endif
382 }
383 }
384 } else
385 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
386 }
387#endif /* KMP_PRINT_DATA_PLACEMENT */
388 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
389}
390
391void
392__kmp_warn( char const * format, ... )
393{
394 char buffer[MAX_MESSAGE];
395 va_list ap;
396
397 if ( __kmp_generate_warnings == kmp_warnings_off ) {
398 return;
399 }
400
401 va_start( ap, format );
402
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000403 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
405 __kmp_vprintf( kmp_err, buffer, ap );
406 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
407
408 va_end( ap );
409}
410
411void
412__kmp_abort_process()
413{
414
415 // Later threads may stall here, but that's ok because abort() will kill them.
416 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
417
418 if ( __kmp_debug_buf ) {
419 __kmp_dump_debug_buffer();
420 }; // if
421
422 if ( KMP_OS_WINDOWS ) {
423 // Let other threads know of abnormal termination and prevent deadlock
424 // if abort happened during library initialization or shutdown
425 __kmp_global.g.g_abort = SIGABRT;
426
427 /*
428 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
429 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
430 works well, but this function is not available in VS7 (this is not problem for DLL, but
431 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
432 not help, at least in some versions of MS C RTL.
433
434 It seems following sequence is the only way to simulate abort() and avoid pop-up error
435 box.
436 */
437 raise( SIGABRT );
438 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
441 }; // if
442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
445
446} // __kmp_abort_process
447
448void
449__kmp_abort_thread( void )
450{
451 // TODO: Eliminate g_abort global variable and this function.
452 // In case of abort just call abort(), it will kill all the threads.
453 __kmp_infinite_loop();
454} // __kmp_abort_thread
455
456/* ------------------------------------------------------------------------ */
457
458/*
459 * Print out the storage map for the major kmp_info_t thread data structures
460 * that are allocated together.
461 */
462
463static void
464__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
465{
466 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
467
468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
469 "th_%d.th_info", gtid );
470
471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
472 "th_%d.th_local", gtid );
473
474 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
475 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
476
477 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
478 &thr->th.th_bar[bs_plain_barrier+1],
479 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
480
481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
482 &thr->th.th_bar[bs_forkjoin_barrier+1],
483 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
484
485 #if KMP_FAST_REDUCTION_BARRIER
486 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier+1],
488 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
489 #endif // KMP_FAST_REDUCTION_BARRIER
490}
491
492/*
493 * Print out the storage map for the major kmp_team_t team data structures
494 * that are allocated together.
495 */
496
497static void
498__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
499{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000500 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000501 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
502 header, team_id );
503
504 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
505 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
506
507
508 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
509 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
512 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
513
514 #if KMP_FAST_REDUCTION_BARRIER
515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
516 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
517 #endif // KMP_FAST_REDUCTION_BARRIER
518
519 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
523 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
524
525 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
526 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
527 header, team_id );
528
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529
530 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
531 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
532}
533
534static void __kmp_init_allocator() {}
535static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536
537/* ------------------------------------------------------------------------ */
538
Jonathan Peyton99016992015-05-26 17:32:53 +0000539#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540# if KMP_OS_WINDOWS
541
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542static void
543__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
544 // TODO: Change to __kmp_break_bootstrap_lock().
545 __kmp_init_bootstrap_lock( lck ); // make the lock released
546}
547
548static void
549__kmp_reset_locks_on_process_detach( int gtid_req ) {
550 int i;
551 int thread_count;
552
553 // PROCESS_DETACH is expected to be called by a thread
554 // that executes ProcessExit() or FreeLibrary().
555 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
556 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
557 // However, in fact, some threads can be still alive here, although being about to be terminated.
558 // The threads in the array with ds_thread==0 are most suspicious.
559 // Actually, it can be not safe to access the __kmp_threads[].
560
561 // TODO: does it make sense to check __kmp_roots[] ?
562
563 // Let's check that there are no other alive threads registered with the OMP lib.
564 while( 1 ) {
565 thread_count = 0;
566 for( i = 0; i < __kmp_threads_capacity; ++i ) {
567 if( !__kmp_threads ) continue;
568 kmp_info_t* th = __kmp_threads[ i ];
569 if( th == NULL ) continue;
570 int gtid = th->th.th_info.ds.ds_gtid;
571 if( gtid == gtid_req ) continue;
572 if( gtid < 0 ) continue;
573 DWORD exit_val;
574 int alive = __kmp_is_thread_alive( th, &exit_val );
575 if( alive ) {
576 ++thread_count;
577 }
578 }
579 if( thread_count == 0 ) break; // success
580 }
581
582 // Assume that I'm alone.
583
584 // Now it might be probably safe to check and reset locks.
585 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
586 __kmp_reset_lock( &__kmp_forkjoin_lock );
587 #ifdef KMP_DEBUG
588 __kmp_reset_lock( &__kmp_stdio_lock );
589 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000590}
591
592BOOL WINAPI
593DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
594 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
595
596 switch( fdwReason ) {
597
598 case DLL_PROCESS_ATTACH:
599 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
600
601 return TRUE;
602
603 case DLL_PROCESS_DETACH:
604 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
605 __kmp_gtid_get_specific() ));
606
607 if( lpReserved != NULL )
608 {
609 // lpReserved is used for telling the difference:
610 // lpReserved == NULL when FreeLibrary() was called,
611 // lpReserved != NULL when the process terminates.
612 // When FreeLibrary() is called, worker threads remain alive.
613 // So they will release the forkjoin lock by themselves.
614 // When the process terminates, worker threads disappear triggering
615 // the problem of unreleased forkjoin lock as described below.
616
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000617 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 // The problem comes up if that worker thread becomes dead
619 // before it releases the forkjoin lock.
620 // The forkjoin lock remains taken, while the thread
621 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
622 // will try to take the forkjoin lock and will always fail,
623 // so that the application will never finish [normally].
624 // This scenario is possible if __kmpc_end() has not been executed.
625 // It looks like it's not a corner case, but common cases:
626 // - the main function was compiled by an alternative compiler;
627 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
628 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
629 // - alive foreign thread prevented __kmpc_end from doing cleanup.
630
631 // This is a hack to work around the problem.
632 // TODO: !!! to figure out something better.
633 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
634 }
635
636 __kmp_internal_end_library( __kmp_gtid_get_specific() );
637
638 return TRUE;
639
640 case DLL_THREAD_ATTACH:
641 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
642
643 /* if we wanted to register new siblings all the time here call
644 * __kmp_get_gtid(); */
645 return TRUE;
646
647 case DLL_THREAD_DETACH:
648 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
649 __kmp_gtid_get_specific() ));
650
651 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
652 return TRUE;
653 }
654
655 return TRUE;
656}
657
658# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000659#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660
661
662/* ------------------------------------------------------------------------ */
663
664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
666int
667__kmp_change_library( int status )
668{
669 int old_status;
670
671 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
672
673 if (status) {
674 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
675 }
676 else {
677 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
678 }
679
680 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
681}
682
683/* ------------------------------------------------------------------------ */
684/* ------------------------------------------------------------------------ */
685
686/* __kmp_parallel_deo --
687 * Wait until it's our turn.
688 */
689void
690__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
691{
692 int gtid = *gtid_ref;
693#ifdef BUILD_PARALLEL_ORDERED
694 kmp_team_t *team = __kmp_team_from_gtid( gtid );
695#endif /* BUILD_PARALLEL_ORDERED */
696
697 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000699#if KMP_USE_DYNAMIC_LOCK
700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
701#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704 }
705#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000708 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000709 KMP_MB();
710 }
711#endif /* BUILD_PARALLEL_ORDERED */
712}
713
714/* __kmp_parallel_dxo --
715 * Signal the next task.
716 */
717
718void
719__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
720{
721 int gtid = *gtid_ref;
722#ifdef BUILD_PARALLEL_ORDERED
723 int tid = __kmp_tid_from_gtid( gtid );
724 kmp_team_t *team = __kmp_team_from_gtid( gtid );
725#endif /* BUILD_PARALLEL_ORDERED */
726
727 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000728 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
730 }
731#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000732 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733 KMP_MB(); /* Flush all pending memory write invalidates. */
734
735 /* use the tid of the next thread in this team */
736 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000739#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000740 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000741 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
742 /* accept blame for "ordered" waiting */
743 kmp_info_t *this_thread = __kmp_threads[gtid];
744 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
745 this_thread->th.ompt_thread_info.wait_id);
746 }
747#endif
748
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749 KMP_MB(); /* Flush all pending memory write invalidates. */
750 }
751#endif /* BUILD_PARALLEL_ORDERED */
752}
753
754/* ------------------------------------------------------------------------ */
755/* ------------------------------------------------------------------------ */
756
757/* ------------------------------------------------------------------------ */
758/* ------------------------------------------------------------------------ */
759
760/* The BARRIER for a SINGLE process section is always explicit */
761
762int
763__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
764{
765 int status;
766 kmp_info_t *th;
767 kmp_team_t *team;
768
769 if( ! TCR_4(__kmp_init_parallel) )
770 __kmp_parallel_initialize();
771
772 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000773 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000774 status = 0;
775
776 th->th.th_ident = id_ref;
777
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000778 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000779 status = 1;
780 } else {
781 kmp_int32 old_this = th->th.th_local.this_construct;
782
783 ++th->th.th_local.this_construct;
784 /* try to set team count to thread count--success means thread got the
785 single block
786 */
787 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000788 if (team->t.t_construct == old_this) {
789 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
790 th->th.th_local.this_construct);
791 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000792#if USE_ITT_BUILD
793 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
794#if OMP_40_ENABLED
795 th->th.th_teams_microtask == NULL &&
796#endif
797 team->t.t_active_level == 1 )
798 { // Only report metadata by master of active team at level 1
799 __kmp_itt_metadata_single( id_ref );
800 }
801#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 }
803
804 if( __kmp_env_consistency_check ) {
805 if (status && push_ws) {
806 __kmp_push_workshare( gtid, ct_psingle, id_ref );
807 } else {
808 __kmp_check_workshare( gtid, ct_psingle, id_ref );
809 }
810 }
811#if USE_ITT_BUILD
812 if ( status ) {
813 __kmp_itt_single_start( gtid );
814 }
815#endif /* USE_ITT_BUILD */
816 return status;
817}
818
819void
820__kmp_exit_single( int gtid )
821{
822#if USE_ITT_BUILD
823 __kmp_itt_single_end( gtid );
824#endif /* USE_ITT_BUILD */
825 if( __kmp_env_consistency_check )
826 __kmp_pop_workshare( gtid, ct_psingle, NULL );
827}
828
829
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830/*
831 * determine if we can go parallel or must use a serialized parallel region and
832 * how many threads we can use
833 * set_nproc is the number of threads requested for the team
834 * returns 0 if we should serialize or only use one thread,
835 * otherwise the number of threads to use
836 * The forkjoin lock is held by the caller.
837 */
838static int
839__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
840 int master_tid, int set_nthreads
841#if OMP_40_ENABLED
842 , int enter_teams
843#endif /* OMP_40_ENABLED */
844)
845{
846 int capacity;
847 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848 KMP_DEBUG_ASSERT( __kmp_init_serial );
849 KMP_DEBUG_ASSERT( root && parent_team );
850
851 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 // If dyn-var is set, dynamically adjust the number of desired threads,
853 // according to the method specified by dynamic_mode.
854 //
855 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
857 ;
858 }
859#ifdef USE_LOAD_BALANCE
860 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
861 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
862 if ( new_nthreads == 1 ) {
863 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
864 master_tid ));
865 return 1;
866 }
867 if ( new_nthreads < set_nthreads ) {
868 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
869 master_tid, new_nthreads ));
870 }
871 }
872#endif /* USE_LOAD_BALANCE */
873 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
874 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
875 : root->r.r_hot_team->t.t_nproc);
876 if ( new_nthreads <= 1 ) {
877 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
878 master_tid ));
879 return 1;
880 }
881 if ( new_nthreads < set_nthreads ) {
882 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
883 master_tid, new_nthreads ));
884 }
885 else {
886 new_nthreads = set_nthreads;
887 }
888 }
889 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
890 if ( set_nthreads > 2 ) {
891 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
892 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
893 if ( new_nthreads == 1 ) {
894 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
895 master_tid ));
896 return 1;
897 }
898 if ( new_nthreads < set_nthreads ) {
899 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
900 master_tid, new_nthreads ));
901 }
902 }
903 }
904 else {
905 KMP_ASSERT( 0 );
906 }
907
908 //
909 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
910 //
911 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
913 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
914 root->r.r_hot_team->t.t_nproc );
915 if ( tl_nthreads <= 0 ) {
916 tl_nthreads = 1;
917 }
918
919 //
920 // If dyn-var is false, emit a 1-time warning.
921 //
922 if ( ! get__dynamic_2( parent_team, master_tid )
923 && ( ! __kmp_reserve_warn ) ) {
924 __kmp_reserve_warn = 1;
925 __kmp_msg(
926 kmp_ms_warning,
927 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
928 KMP_HNT( Unset_ALL_THREADS ),
929 __kmp_msg_null
930 );
931 }
932 if ( tl_nthreads == 1 ) {
933 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
934 master_tid ));
935 return 1;
936 }
937 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
938 master_tid, tl_nthreads ));
939 new_nthreads = tl_nthreads;
940 }
941
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942 //
943 // Check if the threads array is large enough, or needs expanding.
944 //
945 // See comment in __kmp_register_root() about the adjustment if
946 // __kmp_threads[0] == NULL.
947 //
948 capacity = __kmp_threads_capacity;
949 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
950 --capacity;
951 }
952 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
953 root->r.r_hot_team->t.t_nproc ) > capacity ) {
954 //
955 // Expand the threads array.
956 //
957 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
958 root->r.r_hot_team->t.t_nproc ) - capacity;
959 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
960 if ( slotsAdded < slotsRequired ) {
961 //
962 // The threads array was not expanded enough.
963 //
964 new_nthreads -= ( slotsRequired - slotsAdded );
965 KMP_ASSERT( new_nthreads >= 1 );
966
967 //
968 // If dyn-var is false, emit a 1-time warning.
969 //
970 if ( ! get__dynamic_2( parent_team, master_tid )
971 && ( ! __kmp_reserve_warn ) ) {
972 __kmp_reserve_warn = 1;
973 if ( __kmp_tp_cached ) {
974 __kmp_msg(
975 kmp_ms_warning,
976 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
977 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
978 KMP_HNT( PossibleSystemLimitOnThreads ),
979 __kmp_msg_null
980 );
981 }
982 else {
983 __kmp_msg(
984 kmp_ms_warning,
985 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
986 KMP_HNT( SystemLimitOnThreads ),
987 __kmp_msg_null
988 );
989 }
990 }
991 }
992 }
993
994 if ( new_nthreads == 1 ) {
995 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
996 __kmp_get_gtid(), set_nthreads ) );
997 return 1;
998 }
999
1000 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1001 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1002 return new_nthreads;
1003}
1004
1005/* ------------------------------------------------------------------------ */
1006/* ------------------------------------------------------------------------ */
1007
1008/* allocate threads from the thread pool and assign them to the new team */
1009/* we are assured that there are enough threads available, because we
1010 * checked on that earlier within critical section forkjoin */
1011
1012static void
1013__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1014 kmp_info_t *master_th, int master_gtid )
1015{
1016 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001017 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018
1019 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1020 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1021 KMP_MB();
1022
1023 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001024 master_th->th.th_info.ds.ds_tid = 0;
1025 master_th->th.th_team = team;
1026 master_th->th.th_team_nproc = team->t.t_nproc;
1027 master_th->th.th_team_master = master_th;
1028 master_th->th.th_team_serialized = FALSE;
1029 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030
1031 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032#if KMP_NESTED_HOT_TEAMS
1033 use_hot_team = 0;
1034 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1035 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1036 int level = team->t.t_active_level - 1; // index in array of hot teams
1037 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1038 if( master_th->th.th_teams_size.nteams > 1 ) {
1039 ++level; // level was not increased in teams construct for team_of_masters
1040 }
1041 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1042 master_th->th.th_teams_level == team->t.t_level ) {
1043 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1044 } // team->t.t_level will be increased inside parallel
1045 }
1046 if( level < __kmp_hot_teams_max_level ) {
1047 if( hot_teams[level].hot_team ) {
1048 // hot team has already been allocated for given level
1049 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1050 use_hot_team = 1; // the team is ready to use
1051 } else {
1052 use_hot_team = 0; // AC: threads are not allocated yet
1053 hot_teams[level].hot_team = team; // remember new hot team
1054 hot_teams[level].hot_team_nth = team->t.t_nproc;
1055 }
1056 } else {
1057 use_hot_team = 0;
1058 }
1059 }
1060#else
1061 use_hot_team = team == root->r.r_hot_team;
1062#endif
1063 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001064
1065 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001066 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 __kmp_initialize_info( master_th, team, 0, master_gtid );
1068
1069 /* now, install the worker threads */
1070 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1071
1072 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1074 team->t.t_threads[ i ] = thr;
1075 KMP_DEBUG_ASSERT( thr );
1076 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001078 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1080 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1081 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1082 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083#if OMP_40_ENABLED
1084 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1085 thr->th.th_teams_level = master_th->th.th_teams_level;
1086 thr->th.th_teams_size = master_th->th.th_teams_size;
1087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088 { // Initialize threads' barrier data.
1089 int b;
1090 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1091 for ( b = 0; b < bs_last_barrier; ++ b ) {
1092 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001093 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001094#if USE_DEBUGGER
1095 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097 }; // for b
1098 }
1099 }
1100
Alp Toker98758b02014-03-02 04:12:06 +00001101#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102 __kmp_partition_places( team );
1103#endif
1104
1105 }
1106
1107 KMP_MB();
1108}
1109
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001110#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1111//
1112// Propagate any changes to the floating point control registers out to the team
1113// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1114// so we don't make changes unless they are needed.
1115//
1116inline static void
1117propagateFPControl(kmp_team_t * team)
1118{
1119 if ( __kmp_inherit_fp_control ) {
1120 kmp_int16 x87_fpu_control_word;
1121 kmp_uint32 mxcsr;
1122
1123 // Get master values of FPU control flags (both X87 and vector)
1124 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1125 __kmp_store_mxcsr( &mxcsr );
1126 mxcsr &= KMP_X86_MXCSR_MASK;
1127
1128 // There is no point looking at t_fp_control_saved here.
1129 // If it is TRUE, we still have to update the values if they are different from those we now have.
1130 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1131 // that the values in the team are the same as those we have.
1132 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1133 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1134 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001135 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1136 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001137 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1138 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001139 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
1141 else {
1142 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144 }
1145}
1146
1147// Do the opposite, setting the hardware registers to the updated values from the team.
1148inline static void
1149updateHWFPControl(kmp_team_t * team)
1150{
1151 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1152 //
1153 // Only reset the fp control regs if they have been changed in the team.
1154 // the parallel region that we are exiting.
1155 //
1156 kmp_int16 x87_fpu_control_word;
1157 kmp_uint32 mxcsr;
1158 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1159 __kmp_store_mxcsr( &mxcsr );
1160 mxcsr &= KMP_X86_MXCSR_MASK;
1161
1162 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1163 __kmp_clear_x87_fpu_status_word();
1164 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1165 }
1166
1167 if ( team->t.t_mxcsr != mxcsr ) {
1168 __kmp_load_mxcsr( &team->t.t_mxcsr );
1169 }
1170 }
1171}
1172#else
1173# define propagateFPControl(x) ((void)0)
1174# define updateHWFPControl(x) ((void)0)
1175#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1176
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177static void
1178__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001180/*
1181 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1182 */
1183void
1184__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1185{
1186 kmp_info_t *this_thr;
1187 kmp_team_t *serial_team;
1188
1189 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1190
1191 /* Skip all this code for autopar serialized loops since it results in
1192 unacceptable overhead */
1193 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1194 return;
1195
1196 if( ! TCR_4( __kmp_init_parallel ) )
1197 __kmp_parallel_initialize();
1198
1199 this_thr = __kmp_threads[ global_tid ];
1200 serial_team = this_thr->th.th_serial_team;
1201
1202 /* utilize the serialized team held by this thread */
1203 KMP_DEBUG_ASSERT( serial_team );
1204 KMP_MB();
1205
1206 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001207 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1208 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001209 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1210 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1211 this_thr->th.th_task_team = NULL;
1212 }
1213
1214#if OMP_40_ENABLED
1215 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1216 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1217 proc_bind = proc_bind_false;
1218 }
1219 else if ( proc_bind == proc_bind_default ) {
1220 //
1221 // No proc_bind clause was specified, so use the current value
1222 // of proc-bind-var for this parallel region.
1223 //
1224 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1225 }
1226 //
1227 // Reset for next parallel region
1228 //
1229 this_thr->th.th_set_proc_bind = proc_bind_default;
1230#endif /* OMP_40_ENABLED */
1231
1232 if( this_thr->th.th_team != serial_team ) {
1233 // Nested level will be an index in the nested nthreads array
1234 int level = this_thr->th.th_team->t.t_level;
1235
1236 if( serial_team->t.t_serialized ) {
1237 /* this serial team was already used
1238 * TODO increase performance by making this locks more specific */
1239 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240
1241 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1242
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001243#if OMPT_SUPPORT
1244 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1245#endif
1246
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001248#if OMPT_SUPPORT
1249 ompt_parallel_id,
1250#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001251#if OMP_40_ENABLED
1252 proc_bind,
1253#endif
1254 & this_thr->th.th_current_task->td_icvs,
1255 0 USE_NESTED_HOT_ARG(NULL) );
1256 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1257 KMP_ASSERT( new_team );
1258
1259 /* setup new serialized team and install it */
1260 new_team->t.t_threads[0] = this_thr;
1261 new_team->t.t_parent = this_thr->th.th_team;
1262 serial_team = new_team;
1263 this_thr->th.th_serial_team = serial_team;
1264
1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1266 global_tid, serial_team ) );
1267
1268
1269 /* TODO the above breaks the requirement that if we run out of
1270 * resources, then we can still guarantee that serialized teams
1271 * are ok, since we may need to allocate a new one */
1272 } else {
1273 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1274 global_tid, serial_team ) );
1275 }
1276
1277 /* we have to initialize this serial team */
1278 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1279 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1280 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1281 serial_team->t.t_ident = loc;
1282 serial_team->t.t_serialized = 1;
1283 serial_team->t.t_nproc = 1;
1284 serial_team->t.t_parent = this_thr->th.th_team;
1285 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1286 this_thr->th.th_team = serial_team;
1287 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1288
1289 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1290 global_tid, this_thr->th.th_current_task ) );
1291 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1292 this_thr->th.th_current_task->td_flags.executing = 0;
1293
1294 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1295
1296 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1297 each serialized task represented by team->t.t_serialized? */
1298 copy_icvs(
1299 & this_thr->th.th_current_task->td_icvs,
1300 & this_thr->th.th_current_task->td_parent->td_icvs );
1301
1302 // Thread value exists in the nested nthreads array for the next nested level
1303 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1304 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1305 }
1306
1307#if OMP_40_ENABLED
1308 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1309 this_thr->th.th_current_task->td_icvs.proc_bind
1310 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1311 }
1312#endif /* OMP_40_ENABLED */
1313
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001314#if USE_DEBUGGER
1315 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1316#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317 this_thr->th.th_info.ds.ds_tid = 0;
1318
1319 /* set thread cache values */
1320 this_thr->th.th_team_nproc = 1;
1321 this_thr->th.th_team_master = this_thr;
1322 this_thr->th.th_team_serialized = 1;
1323
1324 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1325 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1326
1327 propagateFPControl (serial_team);
1328
1329 /* check if we need to allocate dispatch buffers stack */
1330 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1331 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1332 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1333 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1334 }
1335 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1336
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001337#if OMPT_SUPPORT
1338 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1339 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1340#endif
1341
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 KMP_MB();
1343
1344 } else {
1345 /* this serialized team is already being used,
1346 * that's fine, just add another nested level */
1347 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1348 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1349 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1350 ++ serial_team->t.t_serialized;
1351 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1352
1353 // Nested level will be an index in the nested nthreads array
1354 int level = this_thr->th.th_team->t.t_level;
1355 // Thread value exists in the nested nthreads array for the next nested level
1356 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1357 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1358 }
1359 serial_team->t.t_level++;
1360 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1361 global_tid, serial_team, serial_team->t.t_level ) );
1362
1363 /* allocate/push dispatch buffers stack */
1364 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1365 {
1366 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1367 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1368 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1369 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1370 }
1371 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1372
1373 KMP_MB();
1374 }
1375
1376 if ( __kmp_env_consistency_check )
1377 __kmp_push_parallel( global_tid, NULL );
1378
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001379}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001380
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381/* most of the work for a fork */
1382/* return true if we really went parallel, false if serialized */
1383int
1384__kmp_fork_call(
1385 ident_t * loc,
1386 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001387 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001389#if OMPT_SUPPORT
1390 void *unwrapped_task,
1391#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001392 microtask_t microtask,
1393 launch_t invoker,
1394/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001395#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001396 va_list * ap
1397#else
1398 va_list ap
1399#endif
1400 )
1401{
1402 void **argv;
1403 int i;
1404 int master_tid;
1405 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406 kmp_team_t *team;
1407 kmp_team_t *parent_team;
1408 kmp_info_t *master_th;
1409 kmp_root_t *root;
1410 int nthreads;
1411 int master_active;
1412 int master_set_numthreads;
1413 int level;
1414#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001415 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416 int teams_level;
1417#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418#if KMP_NESTED_HOT_TEAMS
1419 kmp_hot_team_ptr_t **p_hot_teams;
1420#endif
1421 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001422 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001423 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001424
1425 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001426 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1427 /* Some systems prefer the stack for the root thread(s) to start with */
1428 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001429 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430 /* These 2 lines below are so this does not get optimized out */
1431 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1432 __kmp_stkpadding += (short)((kmp_int64)dummy);
1433 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434
1435 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001436 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437 if( ! TCR_4(__kmp_init_parallel) )
1438 __kmp_parallel_initialize();
1439
1440 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001441 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1442 parent_team = master_th->th.th_team;
1443 master_tid = master_th->th.th_info.ds.ds_tid;
1444 master_this_cons = master_th->th.th_local.this_construct;
1445 root = master_th->th.th_root;
1446 master_active = root->r.r_active;
1447 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001448
1449#if OMPT_SUPPORT
1450 ompt_parallel_id_t ompt_parallel_id;
1451 ompt_task_id_t ompt_task_id;
1452 ompt_frame_t *ompt_frame;
1453 ompt_task_id_t my_task_id;
1454 ompt_parallel_id_t my_parallel_id;
1455
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001456 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001457 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1458 ompt_task_id = __ompt_get_task_id_internal(0);
1459 ompt_frame = __ompt_get_task_frame_internal(0);
1460 }
1461#endif
1462
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463 // Nested level will be an index in the nested nthreads array
1464 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001465 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001466#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1468#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001469#if KMP_NESTED_HOT_TEAMS
1470 p_hot_teams = &master_th->th.th_hot_teams;
1471 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1472 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1473 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1474 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1475 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1476 }
1477#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001480 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1482 int team_size = master_set_numthreads;
1483
1484 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1485 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001486 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001487 }
1488#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490 master_th->th.th_ident = loc;
1491
1492#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001493 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1495 // AC: This is start of parallel that is nested inside teams construct.
1496 // The team is actual (hot), all workers are ready at the fork barrier.
1497 // No lock needed to initialize the team a bit, then free workers.
1498 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001499 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500 parent_team->t.t_argc = argc;
1501 argv = (void**)parent_team->t.t_argv;
1502 for( i=argc-1; i >= 0; --i )
1503/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001504#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001505 *argv++ = va_arg( *ap, void * );
1506#else
1507 *argv++ = va_arg( ap, void * );
1508#endif
1509 /* Increment our nested depth levels, but not increase the serialization */
1510 if ( parent_team == master_th->th.th_serial_team ) {
1511 // AC: we are in serialized parallel
1512 __kmpc_serialized_parallel(loc, gtid);
1513 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1514 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1515 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001516
1517#if OMPT_SUPPORT
1518 void *dummy;
1519 void **exit_runtime_p;
1520
1521 ompt_lw_taskteam_t lw_taskteam;
1522
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001523 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1525 unwrapped_task, ompt_parallel_id);
1526 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1527 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1528
1529 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1530
1531#if OMPT_TRACE
1532 /* OMPT implicit task begin */
1533 my_task_id = lw_taskteam.ompt_task_info.task_id;
1534 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001535 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001536 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1537 my_parallel_id, my_task_id);
1538 }
1539#endif
1540
1541 /* OMPT state */
1542 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1543 } else {
1544 exit_runtime_p = &dummy;
1545 }
1546#endif
1547
Jonathan Peyton45be4502015-08-11 21:36:41 +00001548 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001549 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1550 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001553 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001554#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001555 );
1556 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557
1558#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001559 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001560 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001562 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001563
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001564 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001565 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1566 ompt_parallel_id, ompt_task_id);
1567 }
1568
1569 __ompt_lw_taskteam_unlink(master_th);
1570 // reset clear the task id only after unlinking the task
1571 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1572#endif
1573
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001574 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001575 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001576 ompt_parallel_id, ompt_task_id,
1577 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001578 }
1579 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1580 }
1581#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582 return TRUE;
1583 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001584
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001586#if OMPT_SUPPORT
1587 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1588#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589 parent_team->t.t_invoke = invoker;
1590 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1591 parent_team->t.t_active_level ++;
1592 parent_team->t.t_level ++;
1593
1594 /* Change number of threads in the team if requested */
1595 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001596 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001597 // AC: only can reduce the number of threads dynamically, cannot increase
1598 kmp_info_t **other_threads = parent_team->t.t_threads;
1599 parent_team->t.t_nproc = master_set_numthreads;
1600 for ( i = 0; i < master_set_numthreads; ++i ) {
1601 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1602 }
1603 // Keep extra threads hot in the team for possible next parallels
1604 }
1605 master_th->th.th_set_nproc = 0;
1606 }
1607
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001608#if USE_DEBUGGER
1609 if ( __kmp_debugging ) { // Let debugger override number of threads.
1610 int nth = __kmp_omp_num_threads( loc );
1611 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1612 master_set_numthreads = nth;
1613 }; // if
1614 }; // if
1615#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616
1617 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1618 __kmp_internal_fork( loc, gtid, parent_team );
1619 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1620
1621 /* Invoke microtask for MASTER thread */
1622 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1623 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1624
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001625 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001626 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1627 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001628 if (! parent_team->t.t_invoke( gtid )) {
1629 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1630 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631 }
1632 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1633 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1634 KMP_MB(); /* Flush all pending memory write invalidates. */
1635
1636 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1637
1638 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001639 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001640#endif /* OMP_40_ENABLED */
1641
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001642#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001644 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1649 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001650 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001651#if OMP_40_ENABLED
1652 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1653#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001655 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001656
1657 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1658 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1659 if (nthreads > 1) {
1660 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1661#if OMP_40_ENABLED
1662 && !enter_teams
1663#endif /* OMP_40_ENABLED */
1664 ) ) || ( __kmp_library == library_serial ) ) {
1665 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1666 gtid, nthreads ));
1667 nthreads = 1;
1668 }
1669 }
1670 if ( nthreads > 1 ) {
1671 /* determine how many new threads we can use */
1672 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1673
1674 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001676/* AC: If we execute teams from parallel region (on host), then teams should be created
1677 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1678 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001679 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001681 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001682 if ( nthreads == 1 ) {
1683 // Free lock for single thread execution here;
1684 // for multi-thread execution it will be freed later
1685 // after team of threads created and initialized
1686 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1687 }
1688 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689 }
1690 KMP_DEBUG_ASSERT( nthreads > 0 );
1691
1692 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001693 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 /* create a serialized parallel region? */
1696 if ( nthreads == 1 ) {
1697 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001698#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699 void * args[ argc ];
1700#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001701 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001702#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703
Jim Cownie5e8470a2013-09-27 10:38:44 +00001704 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1705
1706 __kmpc_serialized_parallel(loc, gtid);
1707
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001710 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711#if OMP_40_ENABLED
1712 if ( !ap ) {
1713 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001714 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001716
1717#if OMPT_SUPPORT
1718 void *dummy;
1719 void **exit_runtime_p;
1720
1721 ompt_lw_taskteam_t lw_taskteam;
1722
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001723 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001724 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1725 unwrapped_task, ompt_parallel_id);
1726 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1727 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1728
1729 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1730
1731#if OMPT_TRACE
1732 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001733 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001734 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1735 ompt_parallel_id, my_task_id);
1736 }
1737#endif
1738
1739 /* OMPT state */
1740 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1741 } else {
1742 exit_runtime_p = &dummy;
1743 }
1744#endif
1745
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001746 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001747 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1748 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001749 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1750#if OMPT_SUPPORT
1751 , exit_runtime_p
1752#endif
1753 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001754 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001755
1756#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001757 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001758 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001759 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001760
1761#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001762 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1764 ompt_parallel_id, ompt_task_id);
1765 }
1766#endif
1767
1768 __ompt_lw_taskteam_unlink(master_th);
1769 // reset clear the task id only after unlinking the task
1770 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1771
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001772 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001773 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001774 ompt_parallel_id, ompt_task_id,
1775 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001776 }
1777 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 }
1779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001780 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1781 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1782 team = master_th->th.th_team;
1783 //team->t.t_pkfn = microtask;
1784 team->t.t_invoke = invoker;
1785 __kmp_alloc_argv_entries( argc, team, TRUE );
1786 team->t.t_argc = argc;
1787 argv = (void**) team->t.t_argv;
1788 if ( ap ) {
1789 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001791# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001793# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796 } else {
1797 for( i=0; i < argc; ++i )
1798 // Get args from parent team for teams construct
1799 argv[i] = parent_team->t.t_argv[i];
1800 }
1801 // AC: revert change made in __kmpc_serialized_parallel()
1802 // because initial code in teams should have level=0
1803 team->t.t_level--;
1804 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001805 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001808 invoker(gtid);
1809 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 } else {
1811#endif /* OMP_40_ENABLED */
1812 argv = args;
1813 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001815#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001819#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821
1822#if OMPT_SUPPORT
1823 void *dummy;
1824 void **exit_runtime_p;
1825
1826 ompt_lw_taskteam_t lw_taskteam;
1827
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001828 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001829 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1830 unwrapped_task, ompt_parallel_id);
1831 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1832 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1833
1834 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1835
1836#if OMPT_TRACE
1837 /* OMPT implicit task begin */
1838 my_task_id = lw_taskteam.ompt_task_info.task_id;
1839 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001840 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001841 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1842 my_parallel_id, my_task_id);
1843 }
1844#endif
1845
1846 /* OMPT state */
1847 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1848 } else {
1849 exit_runtime_p = &dummy;
1850 }
1851#endif
1852
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001853 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001854 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1855 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001856 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1857#if OMPT_SUPPORT
1858 , exit_runtime_p
1859#endif
1860 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001861 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001862
1863#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001864 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001865 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001866#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001867 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001868
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001869 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001870 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1871 my_parallel_id, my_task_id);
1872 }
1873#endif
1874
1875 __ompt_lw_taskteam_unlink(master_th);
1876 // reset clear the task id only after unlinking the task
1877 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1878
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001879 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001880 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001881 ompt_parallel_id, ompt_task_id,
1882 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001883 }
1884 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1885 }
1886#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887#if OMP_40_ENABLED
1888 }
1889#endif /* OMP_40_ENABLED */
1890 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001891 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001892#if OMPT_SUPPORT
1893 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1894 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1895 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1896 unwrapped_task, ompt_parallel_id);
1897
1898 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001899 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001900 __ompt_lw_taskteam_link(lwt, master_th);
1901#endif
1902
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001903 // we were called from GNU native code
1904 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1905 return FALSE;
1906 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001908 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909 }
1910
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001912 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913 KMP_MB();
1914 return FALSE;
1915 }
1916
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917 // GEH: only modify the executing flag in the case when not serialized
1918 // serialized case is handled in kmpc_serialized_parallel
1919 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001920 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1921 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1923 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1924 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925
1926#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001927 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928#endif /* OMP_40_ENABLED */
1929 {
1930 /* Increment our nested depth level */
1931 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1932 }
1933
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001936 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1937 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939 else {
1940 nthreads_icv = 0; // don't update
1941 }
1942
1943#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001944 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001946 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1948 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 }
1950 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001951 if (proc_bind == proc_bind_default) {
1952 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1953 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001955 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1956 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001958 if ((level+1 < __kmp_nested_proc_bind.used)
1959 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1960 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961 }
1962 }
1963
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965 master_th->th.th_set_proc_bind = proc_bind_default;
1966#endif /* OMP_40_ENABLED */
1967
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001972 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001976 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977 new_icvs.nproc = nthreads_icv;
1978 }
1979
1980#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001981 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982 new_icvs.proc_bind = proc_bind_icv;
1983 }
1984#endif /* OMP_40_ENABLED */
1985
1986 /* allocate a new parallel team */
1987 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001989#if OMPT_SUPPORT
1990 ompt_parallel_id,
1991#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001995 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1996 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 /* allocate a new parallel team */
1998 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1999 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002000#if OMPT_SUPPORT
2001 ompt_parallel_id,
2002#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002004 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002006 &master_th->th.th_current_task->td_icvs, argc
2007 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002009 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
2011 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002012 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2013 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2014 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2015 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2016 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002017#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002018 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002019#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002020 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021 // TODO: parent_team->t.t_level == INT_MAX ???
2022#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002023 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002025 int new_level = parent_team->t.t_level + 1;
2026 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2027 new_level = parent_team->t.t_active_level + 1;
2028 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029#if OMP_40_ENABLED
2030 } else {
2031 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 int new_level = parent_team->t.t_level;
2033 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2034 new_level = parent_team->t.t_active_level;
2035 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036 }
2037#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002038 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002039 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002040 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002042#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002043 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002044#endif
2045
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 // Update the floating point rounding in the team if required.
2047 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048
2049 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002050 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002051#if 0
2052 // Patch out an assertion that trips while the runtime seems to operate correctly.
2053 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002054 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002055#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002058 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002059
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002060 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002061 // Take a memo of master's task_state
2062 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2063 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002064 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2065 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002067 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002068 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2069 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2070 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002071 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2072 new_stack[i] = 0;
2073 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002074 old_stack = master_th->th.th_task_state_memo_stack;
2075 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002076 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002077 __kmp_free(old_stack);
2078 }
2079 // Store master's task_state on stack
2080 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2081 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002082#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002083 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002084 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2085 }
2086 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002087#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002088 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002089#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002090 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002091#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002092 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002093#if !KMP_NESTED_HOT_TEAMS
2094 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097
2098 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2099 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2100 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2101 ( team->t.t_master_tid == 0 &&
2102 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2103 KMP_MB();
2104
2105 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002106 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107#if OMP_40_ENABLED
2108 if ( ap ) {
2109#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002110 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002111// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002112#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002115 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002116#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002117 KMP_CHECK_UPDATE(*argv, new_argv);
2118 argv++;
2119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120#if OMP_40_ENABLED
2121 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002122 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002124 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2125 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126 }
2127#endif /* OMP_40_ENABLED */
2128
2129 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002130 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002131 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2132 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133
2134 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002135 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002137#if OMPT_SUPPORT
2138 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2139#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002140
2141 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2142
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002144 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002146 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002147# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002148 ) {
2149#if USE_ITT_NOTIFY
2150 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2151 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002153 kmp_uint64 tmp_time = 0;
2154 if ( __itt_get_timestamp_ptr )
2155 tmp_time = __itt_get_timestamp();
2156 // Internal fork - report frame begin
2157 master_th->th.th_frame_time = tmp_time;
2158 if ( __kmp_forkjoin_frames_mode == 3 )
2159 team->t.t_region_time = tmp_time;
2160 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2161#endif /* USE_ITT_NOTIFY */
2162 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2163 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2164 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002165 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2166 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002167 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168#endif /* USE_ITT_BUILD */
2169
2170 /* now go on and do the work */
2171 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2172 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2174 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175
2176#if USE_ITT_BUILD
2177 if ( __itt_stack_caller_create_ptr ) {
2178 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2179 }
2180#endif /* USE_ITT_BUILD */
2181
2182#if OMP_40_ENABLED
2183 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2184#endif /* OMP_40_ENABLED */
2185 {
2186 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002187 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2188 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189 }
2190
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002191 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2193 return TRUE;
2194 }
2195
2196 /* Invoke microtask for MASTER thread */
2197 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2198 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002200
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002201 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002202 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2203 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204 if (! team->t.t_invoke( gtid )) {
2205 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2206 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207 }
2208 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2209 gtid, team->t.t_id, team->t.t_pkfn ) );
2210 KMP_MB(); /* Flush all pending memory write invalidates. */
2211
2212 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2213
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002214#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002215 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002216 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2217 }
2218#endif
2219
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 return TRUE;
2221}
2222
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002223#if OMPT_SUPPORT
2224static inline void
2225__kmp_join_restore_state(
2226 kmp_info_t *thread,
2227 kmp_team_t *team)
2228{
2229 // restore state outside the region
2230 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2231 ompt_state_work_serial : ompt_state_work_parallel);
2232}
2233
2234static inline void
2235__kmp_join_ompt(
2236 kmp_info_t *thread,
2237 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002238 ompt_parallel_id_t parallel_id,
2239 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002241 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002242 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002244 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002245 }
2246
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002247 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002248 __kmp_join_restore_state(thread,team);
2249}
2250#endif
2251
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002253__kmp_join_call(ident_t *loc, int gtid
2254#if OMPT_SUPPORT
2255 , enum fork_context_e fork_context
2256#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257#if OMP_40_ENABLED
2258 , int exit_teams
2259#endif /* OMP_40_ENABLED */
2260)
2261{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00002262 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263 kmp_team_t *team;
2264 kmp_team_t *parent_team;
2265 kmp_info_t *master_th;
2266 kmp_root_t *root;
2267 int master_active;
2268 int i;
2269
2270 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2271
2272 /* setup current data */
2273 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002274 root = master_th->th.th_root;
2275 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276 parent_team = team->t.t_parent;
2277
2278 master_th->th.th_ident = loc;
2279
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002280#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002281 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002282 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2283 }
2284#endif
2285
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002286#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2288 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2289 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002290 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2291 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002293#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294
2295 if( team->t.t_serialized ) {
2296#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002297 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002298 // We are in teams construct
2299 int level = team->t.t_level;
2300 int tlevel = master_th->th.th_teams_level;
2301 if ( level == tlevel ) {
2302 // AC: we haven't incremented it earlier at start of teams construct,
2303 // so do it here - at the end of teams construct
2304 team->t.t_level++;
2305 } else if ( level == tlevel + 1 ) {
2306 // AC: we are exiting parallel inside teams, need to increment serialization
2307 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2308 team->t.t_serialized++;
2309 }
2310 }
2311#endif /* OMP_40_ENABLED */
2312 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002313
2314#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002315 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316 __kmp_join_restore_state(master_th, parent_team);
2317 }
2318#endif
2319
Jim Cownie5e8470a2013-09-27 10:38:44 +00002320 return;
2321 }
2322
2323 master_active = team->t.t_master_active;
2324
2325#if OMP_40_ENABLED
2326 if (!exit_teams)
2327#endif /* OMP_40_ENABLED */
2328 {
2329 // AC: No barrier for internal teams at exit from teams construct.
2330 // But there is barrier for external team (league).
2331 __kmp_internal_join( loc, gtid, team );
2332 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002333#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002334 else {
2335 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2336 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002337#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002338
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339 KMP_MB();
2340
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002341#if OMPT_SUPPORT
2342 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2343#endif
2344
Jim Cownie5e8470a2013-09-27 10:38:44 +00002345#if USE_ITT_BUILD
2346 if ( __itt_stack_caller_create_ptr ) {
2347 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2348 }
2349
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002350 // Mark end of "parallel" region for VTune.
2351 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002353 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002355 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002356 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002357 // only one notification scheme (either "submit" or "forking/joined", not both)
2358 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2359 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2360 0, loc, master_th->th.th_team_nproc, 1 );
2361 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2362 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2363 __kmp_itt_region_joined( gtid );
2364 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365#endif /* USE_ITT_BUILD */
2366
2367#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002368 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369 !exit_teams &&
2370 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2371 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2372 // AC: We need to leave the team structure intact at the end
2373 // of parallel inside the teams construct, so that at the next
2374 // parallel same (hot) team works, only adjust nesting levels
2375
2376 /* Decrement our nested depth level */
2377 team->t.t_level --;
2378 team->t.t_active_level --;
2379 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2380
2381 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002384 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385 kmp_info_t **other_threads = team->t.t_threads;
2386 team->t.t_nproc = new_num;
2387 for ( i = 0; i < old_num; ++i ) {
2388 other_threads[i]->th.th_team_nproc = new_num;
2389 }
2390 // Adjust states of non-used threads of the team
2391 for ( i = old_num; i < new_num; ++i ) {
2392 // Re-initialize thread's barrier data.
2393 int b;
2394 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002395 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002397 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002398#if USE_DEBUGGER
2399 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2400#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002401 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002402 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2403 // Synchronize thread's task state
2404 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2405 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002406 }
2407 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002408
2409#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002410 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002411 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002412 }
2413#endif
2414
Jim Cownie5e8470a2013-09-27 10:38:44 +00002415 return;
2416 }
2417#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002419 /* do cleanup and restore the parent team */
2420 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2421 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2422
2423 master_th->th.th_dispatch =
2424 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425
2426 /* jc: The following lock has instructions with REL and ACQ semantics,
2427 separating the parallel user code called in this parallel region
2428 from the serial user code called after this function returns.
2429 */
2430 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2431
2432#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002433 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434#endif /* OMP_40_ENABLED */
2435 {
2436 /* Decrement our nested depth level */
2437 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2438 }
2439 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2440
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002441#if OMPT_SUPPORT && OMPT_TRACE
2442 if(ompt_enabled){
2443 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2444 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2445 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2446 parallel_id, task_info->task_id);
2447 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002448 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002449 task_info->task_id = 0;
2450 }
2451#endif
2452
Jim Cownie5e8470a2013-09-27 10:38:44 +00002453 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2454 0, master_th, team ) );
2455 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456
Alp Toker98758b02014-03-02 04:12:06 +00002457#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458 //
2459 // Restore master thread's partition.
2460 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002461 master_th->th.th_first_place = team->t.t_first_place;
2462 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463#endif /* OMP_40_ENABLED */
2464
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002465 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002467 if ( root->r.r_active != master_active )
2468 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002470 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471
2472 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002473 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002475 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476 * assertion failures. it's only one deref&assign so might as well put this
2477 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002478 master_th->th.th_team = parent_team;
2479 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2480 master_th->th.th_team_master = parent_team->t.t_threads[0];
2481 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482
2483 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002484 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485 parent_team != master_th->th.th_serial_team &&
2486 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002487 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2488 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002489 }
2490
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002492 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2493 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2494 // Remember master's state if we re-use this nested hot team
2495 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002496 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002498 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002500 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002501 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002503 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002504 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002506 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2507 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2508 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002509
2510 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2511
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002512#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002513 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002514 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002515 }
2516#endif
2517
Jim Cownie5e8470a2013-09-27 10:38:44 +00002518 KMP_MB();
2519 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2520}
2521
2522/* ------------------------------------------------------------------------ */
2523/* ------------------------------------------------------------------------ */
2524
2525/* Check whether we should push an internal control record onto the
2526 serial team stack. If so, do it. */
2527void
2528__kmp_save_internal_controls ( kmp_info_t * thread )
2529{
2530
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002531 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 return;
2533 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535 int push = 0;
2536
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002537 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 push = 1;
2539 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002540 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2541 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542 push = 1;
2543 }
2544 }
2545 if (push) { /* push a record on the serial team's stack */
2546 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2547
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549
2550 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2551
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002552 control->next = thread->th.th_team->t.t_control_stack_top;
2553 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554 }
2555 }
2556}
2557
2558/* Changes set_nproc */
2559void
2560__kmp_set_num_threads( int new_nth, int gtid )
2561{
2562 kmp_info_t *thread;
2563 kmp_root_t *root;
2564
2565 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2566 KMP_DEBUG_ASSERT( __kmp_init_serial );
2567
2568 if (new_nth < 1)
2569 new_nth = 1;
2570 else if (new_nth > __kmp_max_nth)
2571 new_nth = __kmp_max_nth;
2572
Jonathan Peyton45be4502015-08-11 21:36:41 +00002573 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002574 thread = __kmp_threads[gtid];
2575
2576 __kmp_save_internal_controls( thread );
2577
2578 set__nproc( thread, new_nth );
2579
2580 //
2581 // If this omp_set_num_threads() call will cause the hot team size to be
2582 // reduced (in the absence of a num_threads clause), then reduce it now,
2583 // rather than waiting for the next parallel region.
2584 //
2585 root = thread->th.th_root;
2586 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002587 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2588#if KMP_NESTED_HOT_TEAMS
2589 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2590#endif
2591 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 kmp_team_t *hot_team = root->r.r_hot_team;
2593 int f;
2594
2595 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2596
Jim Cownie5e8470a2013-09-27 10:38:44 +00002597 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2599 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002600 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2601 // When decreasing team size, threads no longer in the team should unref task team.
2602 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2603 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 __kmp_free_thread( hot_team->t.t_threads[f] );
2605 hot_team->t.t_threads[f] = NULL;
2606 }
2607 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002608#if KMP_NESTED_HOT_TEAMS
2609 if( thread->th.th_hot_teams ) {
2610 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2611 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2612 }
2613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2616
2617 //
2618 // Update the t_nproc field in the threads that are still active.
2619 //
2620 for( f=0 ; f < new_nth; f++ ) {
2621 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2622 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2623 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002625 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627}
2628
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629/* Changes max_active_levels */
2630void
2631__kmp_set_max_active_levels( int gtid, int max_active_levels )
2632{
2633 kmp_info_t *thread;
2634
2635 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2636 KMP_DEBUG_ASSERT( __kmp_init_serial );
2637
2638 // validate max_active_levels
2639 if( max_active_levels < 0 ) {
2640 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2641 // We ignore this call if the user has specified a negative value.
2642 // The current setting won't be changed. The last valid setting will be used.
2643 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2644 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2645 return;
2646 }
2647 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2648 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2649 // We allow a zero value. (implementation defined behavior)
2650 } else {
2651 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2652 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2653 // Current upper limit is MAX_INT. (implementation defined behavior)
2654 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2655 // Actually, the flow should never get here until we use MAX_INT limit.
2656 }
2657 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2658
2659 thread = __kmp_threads[ gtid ];
2660
2661 __kmp_save_internal_controls( thread );
2662
2663 set__max_active_levels( thread, max_active_levels );
2664
2665}
2666
2667/* Gets max_active_levels */
2668int
2669__kmp_get_max_active_levels( int gtid )
2670{
2671 kmp_info_t *thread;
2672
2673 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2674 KMP_DEBUG_ASSERT( __kmp_init_serial );
2675
2676 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002677 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002679 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2680 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681}
2682
2683/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2684void
2685__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2686{
2687 kmp_info_t *thread;
2688// kmp_team_t *team;
2689
2690 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2691 KMP_DEBUG_ASSERT( __kmp_init_serial );
2692
2693 // Check if the kind parameter is valid, correct if needed.
2694 // Valid parameters should fit in one of two intervals - standard or extended:
2695 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2696 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2697 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2698 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2699 {
2700 // TODO: Hint needs attention in case we change the default schedule.
2701 __kmp_msg(
2702 kmp_ms_warning,
2703 KMP_MSG( ScheduleKindOutOfRange, kind ),
2704 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2705 __kmp_msg_null
2706 );
2707 kind = kmp_sched_default;
2708 chunk = 0; // ignore chunk value in case of bad kind
2709 }
2710
2711 thread = __kmp_threads[ gtid ];
2712
2713 __kmp_save_internal_controls( thread );
2714
2715 if ( kind < kmp_sched_upper_std ) {
2716 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2717 // differ static chunked vs. unchunked:
2718 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002721 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722 }
2723 } else {
2724 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002725 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2727 }
2728 if ( kind == kmp_sched_auto ) {
2729 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002730 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002732 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733 }
2734}
2735
2736/* Gets def_sched_var ICV values */
2737void
2738__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2739{
2740 kmp_info_t *thread;
2741 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742
2743 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2744 KMP_DEBUG_ASSERT( __kmp_init_serial );
2745
2746 thread = __kmp_threads[ gtid ];
2747
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002748 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749
2750 switch ( th_type ) {
2751 case kmp_sch_static:
2752 case kmp_sch_static_greedy:
2753 case kmp_sch_static_balanced:
2754 *kind = kmp_sched_static;
2755 *chunk = 0; // chunk was not set, try to show this fact via zero value
2756 return;
2757 case kmp_sch_static_chunked:
2758 *kind = kmp_sched_static;
2759 break;
2760 case kmp_sch_dynamic_chunked:
2761 *kind = kmp_sched_dynamic;
2762 break;
2763 case kmp_sch_guided_chunked:
2764 case kmp_sch_guided_iterative_chunked:
2765 case kmp_sch_guided_analytical_chunked:
2766 *kind = kmp_sched_guided;
2767 break;
2768 case kmp_sch_auto:
2769 *kind = kmp_sched_auto;
2770 break;
2771 case kmp_sch_trapezoidal:
2772 *kind = kmp_sched_trapezoidal;
2773 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002774#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002775 case kmp_sch_static_steal:
2776 *kind = kmp_sched_static_steal;
2777 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002778#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 default:
2780 KMP_FATAL( UnknownSchedulingType, th_type );
2781 }
2782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002783 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784}
2785
2786int
2787__kmp_get_ancestor_thread_num( int gtid, int level ) {
2788
2789 int ii, dd;
2790 kmp_team_t *team;
2791 kmp_info_t *thr;
2792
2793 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2794 KMP_DEBUG_ASSERT( __kmp_init_serial );
2795
2796 // validate level
2797 if( level == 0 ) return 0;
2798 if( level < 0 ) return -1;
2799 thr = __kmp_threads[ gtid ];
2800 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002801 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802 if( level > ii ) return -1;
2803
2804#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002805 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806 // AC: we are in teams region where multiple nested teams have same level
2807 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2808 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2809 KMP_DEBUG_ASSERT( ii >= tlevel );
2810 // AC: As we need to pass by the teams league, we need to artificially increase ii
2811 if ( ii == tlevel ) {
2812 ii += 2; // three teams have same level
2813 } else {
2814 ii ++; // two teams have same level
2815 }
2816 }
2817 }
2818#endif
2819
2820 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2821
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002822 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823 level++;
2824 while( ii > level )
2825 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002826 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827 {
2828 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 team = team->t.t_parent;
2831 continue;
2832 }
2833 if( ii > level ) {
2834 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002835 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836 ii--;
2837 }
2838 }
2839
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002840 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
2843int
2844__kmp_get_team_size( int gtid, int level ) {
2845
2846 int ii, dd;
2847 kmp_team_t *team;
2848 kmp_info_t *thr;
2849
2850 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2851 KMP_DEBUG_ASSERT( __kmp_init_serial );
2852
2853 // validate level
2854 if( level == 0 ) return 1;
2855 if( level < 0 ) return -1;
2856 thr = __kmp_threads[ gtid ];
2857 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002858 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859 if( level > ii ) return -1;
2860
2861#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002862 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002863 // AC: we are in teams region where multiple nested teams have same level
2864 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2865 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2866 KMP_DEBUG_ASSERT( ii >= tlevel );
2867 // AC: As we need to pass by the teams league, we need to artificially increase ii
2868 if ( ii == tlevel ) {
2869 ii += 2; // three teams have same level
2870 } else {
2871 ii ++; // two teams have same level
2872 }
2873 }
2874 }
2875#endif
2876
2877 while( ii > level )
2878 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002879 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880 {
2881 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002882 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883 team = team->t.t_parent;
2884 continue;
2885 }
2886 if( ii > level ) {
2887 team = team->t.t_parent;
2888 ii--;
2889 }
2890 }
2891
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002892 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893}
2894
Jim Cownie5e8470a2013-09-27 10:38:44 +00002895kmp_r_sched_t
2896__kmp_get_schedule_global() {
2897// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2898// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2899
2900 kmp_r_sched_t r_sched;
2901
2902 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2903 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2904 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2905 if ( __kmp_sched == kmp_sch_static ) {
2906 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2907 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2908 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2909 } else {
2910 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2911 }
2912
2913 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2914 r_sched.chunk = KMP_DEFAULT_CHUNK;
2915 } else {
2916 r_sched.chunk = __kmp_chunk;
2917 }
2918
2919 return r_sched;
2920}
2921
2922/* ------------------------------------------------------------------------ */
2923/* ------------------------------------------------------------------------ */
2924
2925
2926/*
2927 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2928 * at least argc number of *t_argv entries for the requested team.
2929 */
2930static void
2931__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2932{
2933
2934 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002935 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936
2937 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2938 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002940 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2941 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942
2943 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2944 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002945 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2947 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002948 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949 if ( __kmp_storage_map ) {
2950 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2951 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2952 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2953 "team_%d.t_inline_argv",
2954 team->t.t_id );
2955 }
2956 } else {
2957 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002958 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2960 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2961 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002962 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963 if ( __kmp_storage_map ) {
2964 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2965 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2966 team->t.t_id );
2967 }
2968 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969 }
2970}
2971
2972static void
2973__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2974{
2975 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002976 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002977 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2978 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002979 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002980 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002981 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982 team->t.t_max_nproc = max_nth;
2983
2984 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002985 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002986 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002987#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002988 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2989#endif
2990 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002991}
2992
2993static void
2994__kmp_free_team_arrays(kmp_team_t *team) {
2995 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2996 int i;
2997 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2998 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2999 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3000 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3001 }; // if
3002 }; // for
3003 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00003004 __kmp_free(team->t.t_disp_buffer);
3005 __kmp_free(team->t.t_dispatch);
3006 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003007 team->t.t_threads = NULL;
3008 team->t.t_disp_buffer = NULL;
3009 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003010 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011}
3012
3013static void
3014__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3015 kmp_info_t **oldThreads = team->t.t_threads;
3016
Jonathan Peytona58563d2016-03-29 20:05:27 +00003017 __kmp_free(team->t.t_disp_buffer);
3018 __kmp_free(team->t.t_dispatch);
3019 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020 __kmp_allocate_team_arrays(team, max_nth);
3021
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003022 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003023
3024 __kmp_free(oldThreads);
3025}
3026
3027static kmp_internal_control_t
3028__kmp_get_global_icvs( void ) {
3029
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003031
3032#if OMP_40_ENABLED
3033 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3034#endif /* OMP_40_ENABLED */
3035
3036 kmp_internal_control_t g_icvs = {
3037 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003038 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3039 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3040 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003042#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003043 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003044#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003045 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3046 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3048 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049#if OMP_40_ENABLED
3050 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003051 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052#endif /* OMP_40_ENABLED */
3053 NULL //struct kmp_internal_control *next;
3054 };
3055
3056 return g_icvs;
3057}
3058
3059static kmp_internal_control_t
3060__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3061
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062 kmp_internal_control_t gx_icvs;
3063 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3064 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3065 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066
3067 return gx_icvs;
3068}
3069
3070static void
3071__kmp_initialize_root( kmp_root_t *root )
3072{
3073 int f;
3074 kmp_team_t *root_team;
3075 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003076 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3078 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079 KMP_DEBUG_ASSERT( root );
3080 KMP_ASSERT( ! root->r.r_begin );
3081
3082 /* setup the root state structure */
3083 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003084 root->r.r_begin = FALSE;
3085 root->r.r_active = FALSE;
3086 root->r.r_in_parallel = 0;
3087 root->r.r_blocktime = __kmp_dflt_blocktime;
3088 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089
3090 /* setup the root team for this task */
3091 /* allocate the root team structure */
3092 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003093
Jim Cownie5e8470a2013-09-27 10:38:44 +00003094 root_team =
3095 __kmp_allocate_team(
3096 root,
3097 1, // new_nproc
3098 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003099#if OMPT_SUPPORT
3100 0, // root parallel id
3101#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102#if OMP_40_ENABLED
3103 __kmp_nested_proc_bind.bind_types[0],
3104#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003105 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003107 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003109#if USE_DEBUGGER
3110 // Non-NULL value should be assigned to make the debugger display the root team.
3111 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3112#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003113
3114 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3115
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003116 root->r.r_root_team = root_team;
3117 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003118
3119 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003120 root_team->t.t_threads[0] = NULL;
3121 root_team->t.t_nproc = 1;
3122 root_team->t.t_serialized = 1;
3123 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3124 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3125 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3127 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3128
3129 /* setup the hot team for this task */
3130 /* allocate the hot team structure */
3131 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003132
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133 hot_team =
3134 __kmp_allocate_team(
3135 root,
3136 1, // new_nproc
3137 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003138#if OMPT_SUPPORT
3139 0, // root parallel id
3140#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141#if OMP_40_ENABLED
3142 __kmp_nested_proc_bind.bind_types[0],
3143#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003146 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147 );
3148 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3149
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003150 root->r.r_hot_team = hot_team;
3151 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003152
3153 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003154 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
3156 /* initialize hot team */
3157 hot_team_max_nth = hot_team->t.t_max_nproc;
3158 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003159 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003160 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003161 hot_team->t.t_nproc = 1;
3162 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3163 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3164 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003165 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003166}
3167
3168#ifdef KMP_DEBUG
3169
3170
3171typedef struct kmp_team_list_item {
3172 kmp_team_p const * entry;
3173 struct kmp_team_list_item * next;
3174} kmp_team_list_item_t;
3175typedef kmp_team_list_item_t * kmp_team_list_t;
3176
3177
3178static void
3179__kmp_print_structure_team_accum( // Add team to list of teams.
3180 kmp_team_list_t list, // List of teams.
3181 kmp_team_p const * team // Team to add.
3182) {
3183
3184 // List must terminate with item where both entry and next are NULL.
3185 // Team is added to the list only once.
3186 // List is sorted in ascending order by team id.
3187 // Team id is *not* a key.
3188
3189 kmp_team_list_t l;
3190
3191 KMP_DEBUG_ASSERT( list != NULL );
3192 if ( team == NULL ) {
3193 return;
3194 }; // if
3195
3196 __kmp_print_structure_team_accum( list, team->t.t_parent );
3197 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3198
3199 // Search list for the team.
3200 l = list;
3201 while ( l->next != NULL && l->entry != team ) {
3202 l = l->next;
3203 }; // while
3204 if ( l->next != NULL ) {
3205 return; // Team has been added before, exit.
3206 }; // if
3207
3208 // Team is not found. Search list again for insertion point.
3209 l = list;
3210 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3211 l = l->next;
3212 }; // while
3213
3214 // Insert team.
3215 {
3216 kmp_team_list_item_t * item =
3217 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3218 * item = * l;
3219 l->entry = team;
3220 l->next = item;
3221 }
3222
3223}
3224
3225static void
3226__kmp_print_structure_team(
3227 char const * title,
3228 kmp_team_p const * team
3229
3230) {
3231 __kmp_printf( "%s", title );
3232 if ( team != NULL ) {
3233 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3234 } else {
3235 __kmp_printf( " - (nil)\n" );
3236 }; // if
3237}
3238
3239static void
3240__kmp_print_structure_thread(
3241 char const * title,
3242 kmp_info_p const * thread
3243
3244) {
3245 __kmp_printf( "%s", title );
3246 if ( thread != NULL ) {
3247 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3248 } else {
3249 __kmp_printf( " - (nil)\n" );
3250 }; // if
3251}
3252
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003253void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003254__kmp_print_structure(
3255 void
3256) {
3257
3258 kmp_team_list_t list;
3259
3260 // Initialize list of teams.
3261 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3262 list->entry = NULL;
3263 list->next = NULL;
3264
3265 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3266 {
3267 int gtid;
3268 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3269 __kmp_printf( "%2d", gtid );
3270 if ( __kmp_threads != NULL ) {
3271 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3272 }; // if
3273 if ( __kmp_root != NULL ) {
3274 __kmp_printf( " %p", __kmp_root[ gtid ] );
3275 }; // if
3276 __kmp_printf( "\n" );
3277 }; // for gtid
3278 }
3279
3280 // Print out __kmp_threads array.
3281 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3282 if ( __kmp_threads != NULL ) {
3283 int gtid;
3284 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3285 kmp_info_t const * thread = __kmp_threads[ gtid ];
3286 if ( thread != NULL ) {
3287 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3288 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3289 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3290 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3291 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3292 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3293 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3294 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3295#if OMP_40_ENABLED
3296 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3297#endif
3298 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3299 __kmp_printf( "\n" );
3300 __kmp_print_structure_team_accum( list, thread->th.th_team );
3301 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3302 }; // if
3303 }; // for gtid
3304 } else {
3305 __kmp_printf( "Threads array is not allocated.\n" );
3306 }; // if
3307
3308 // Print out __kmp_root array.
3309 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3310 if ( __kmp_root != NULL ) {
3311 int gtid;
3312 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3313 kmp_root_t const * root = __kmp_root[ gtid ];
3314 if ( root != NULL ) {
3315 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3316 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3317 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3318 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3319 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3320 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3321 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3322 __kmp_printf( "\n" );
3323 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3324 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3325 }; // if
3326 }; // for gtid
3327 } else {
3328 __kmp_printf( "Ubers array is not allocated.\n" );
3329 }; // if
3330
3331 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3332 while ( list->next != NULL ) {
3333 kmp_team_p const * team = list->entry;
3334 int i;
3335 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3336 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3337 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3338 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3339 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3340 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3341 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3342 __kmp_printf( " Thread %2d: ", i );
3343 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3344 }; // for i
3345 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3346 __kmp_printf( "\n" );
3347 list = list->next;
3348 }; // while
3349
3350 // Print out __kmp_thread_pool and __kmp_team_pool.
3351 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3352 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3353 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3354 __kmp_printf( "\n" );
3355
3356 // Free team list.
3357 while ( list != NULL ) {
3358 kmp_team_list_item_t * item = list;
3359 list = list->next;
3360 KMP_INTERNAL_FREE( item );
3361 }; // while
3362
3363}
3364
3365#endif
3366
3367
3368//---------------------------------------------------------------------------
3369// Stuff for per-thread fast random number generator
3370// Table of primes
3371
3372static const unsigned __kmp_primes[] = {
3373 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3374 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3375 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3376 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3377 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3378 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3379 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3380 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3381 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3382 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3383 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3384 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3385 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3386 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3387 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3388 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3389};
3390
3391//---------------------------------------------------------------------------
3392// __kmp_get_random: Get a random number using a linear congruential method.
3393
3394unsigned short
3395__kmp_get_random( kmp_info_t * thread )
3396{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003397 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398 unsigned short r = x>>16;
3399
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003400 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401
3402 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3403 thread->th.th_info.ds.ds_tid, r) );
3404
3405 return r;
3406}
3407//--------------------------------------------------------
3408// __kmp_init_random: Initialize a random number generator
3409
3410void
3411__kmp_init_random( kmp_info_t * thread )
3412{
3413 unsigned seed = thread->th.th_info.ds.ds_tid;
3414
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003415 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3416 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3417 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003418}
3419
3420
3421#if KMP_OS_WINDOWS
3422/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3423static int
3424__kmp_reclaim_dead_roots(void) {
3425 int i, r = 0;
3426
3427 for(i = 0; i < __kmp_threads_capacity; ++i) {
3428 if( KMP_UBER_GTID( i ) &&
3429 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3430 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3431 r += __kmp_unregister_root_other_thread(i);
3432 }
3433 }
3434 return r;
3435}
3436#endif
3437
3438/*
3439 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3440 free entries generated.
3441
3442 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3443 already dead.
3444
3445 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3446 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3447 __kmp_tp_capacity, if threadprivate cache array has been created.
3448 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3449
3450 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3451 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3452 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3453 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3454 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3455 as many free slots as possible up to nWish.
3456
3457 If any argument is negative, the behavior is undefined.
3458*/
3459static int
3460__kmp_expand_threads(int nWish, int nNeed) {
3461 int added = 0;
3462 int old_tp_cached;
3463 int __kmp_actual_max_nth;
3464
3465 if(nNeed > nWish) /* normalize the arguments */
3466 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003467#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468/* only for Windows static library */
3469 /* reclaim array entries for root threads that are already dead */
3470 added = __kmp_reclaim_dead_roots();
3471
3472 if(nNeed) {
3473 nNeed -= added;
3474 if(nNeed < 0)
3475 nNeed = 0;
3476 }
3477 if(nWish) {
3478 nWish -= added;
3479 if(nWish < 0)
3480 nWish = 0;
3481 }
3482#endif
3483 if(nWish <= 0)
3484 return added;
3485
3486 while(1) {
3487 int nTarget;
3488 int minimumRequiredCapacity;
3489 int newCapacity;
3490 kmp_info_t **newThreads;
3491 kmp_root_t **newRoot;
3492
3493 //
3494 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3495 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3496 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3497 // become > __kmp_max_nth in one of two ways:
3498 //
3499 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3500 // may not be resused by another thread, so we may need to increase
3501 // __kmp_threads_capacity to __kmp_max_threads + 1.
3502 //
3503 // 2) New foreign root(s) are encountered. We always register new
3504 // foreign roots. This may cause a smaller # of threads to be
3505 // allocated at subsequent parallel regions, but the worker threads
3506 // hang around (and eventually go to sleep) and need slots in the
3507 // __kmp_threads[] array.
3508 //
3509 // Anyway, that is the reason for moving the check to see if
3510 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3511 // instead of having it performed here. -BB
3512 //
3513 old_tp_cached = __kmp_tp_cached;
3514 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3515 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3516
3517 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3518 nTarget = nWish;
3519 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3520 /* can't fulfil nWish, so try nNeed */
3521 if(nNeed) {
3522 nTarget = nNeed;
3523 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3524 /* possible expansion too small -- give up */
3525 break;
3526 }
3527 } else {
3528 /* best-effort */
3529 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3530 if(!nTarget) {
3531 /* can expand at all -- give up */
3532 break;
3533 }
3534 }
3535 }
3536 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3537
3538 newCapacity = __kmp_threads_capacity;
3539 do{
3540 newCapacity =
3541 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3542 (newCapacity << 1) :
3543 __kmp_actual_max_nth;
3544 } while(newCapacity < minimumRequiredCapacity);
3545 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3546 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003547 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3548 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549 memset(newThreads + __kmp_threads_capacity, 0,
3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3551 memset(newRoot + __kmp_threads_capacity, 0,
3552 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3553
3554 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3555 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3556 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3557 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3558 of a double-check pair.
3559 */
3560 __kmp_free(newThreads);
3561 continue; /* start over and try again */
3562 }
3563 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3564 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3565 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3566 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3567 __kmp_free(newThreads);
3568 continue; /* start over and try again */
3569 } else {
3570 /* success */
3571 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3572 //
3573 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3574 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3575 added += newCapacity - __kmp_threads_capacity;
3576 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3577 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003578 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579 }
3580 }
3581 return added;
3582}
3583
3584/* register the current thread as a root thread and obtain our gtid */
3585/* we must have the __kmp_initz_lock held at this point */
3586/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3587int
3588__kmp_register_root( int initial_thread )
3589{
3590 kmp_info_t *root_thread;
3591 kmp_root_t *root;
3592 int gtid;
3593 int capacity;
3594 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3595 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3596 KMP_MB();
3597
3598
3599 /*
3600 2007-03-02:
3601
3602 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3603 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3604 return false (that means there is at least one empty slot in __kmp_threads array), but it
3605 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3606 used for this one. Following code workarounds this bug.
3607
3608 However, right solution seems to be not reserving slot #0 for initial thread because:
3609 (1) there is no magic in slot #0,
3610 (2) we cannot detect initial thread reliably (the first thread which does serial
3611 initialization may be not a real initial thread).
3612 */
3613 capacity = __kmp_threads_capacity;
3614 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3615 -- capacity;
3616 }; // if
3617
3618 /* see if there are too many threads */
3619 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3620 if ( __kmp_tp_cached ) {
3621 __kmp_msg(
3622 kmp_ms_fatal,
3623 KMP_MSG( CantRegisterNewThread ),
3624 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3625 KMP_HNT( PossibleSystemLimitOnThreads ),
3626 __kmp_msg_null
3627 );
3628 }
3629 else {
3630 __kmp_msg(
3631 kmp_ms_fatal,
3632 KMP_MSG( CantRegisterNewThread ),
3633 KMP_HNT( SystemLimitOnThreads ),
3634 __kmp_msg_null
3635 );
3636 }
3637 }; // if
3638
3639 /* find an available thread slot */
3640 /* Don't reassign the zero slot since we need that to only be used by initial
3641 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003642 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3643 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3645 KMP_ASSERT( gtid < __kmp_threads_capacity );
3646
3647 /* update global accounting */
3648 __kmp_all_nth ++;
3649 TCW_4(__kmp_nth, __kmp_nth + 1);
3650
3651 //
3652 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3653 // for low numbers of procs, and method #2 (keyed API call) for higher
3654 // numbers of procs.
3655 //
3656 if ( __kmp_adjust_gtid_mode ) {
3657 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3658 if ( TCR_4(__kmp_gtid_mode) != 2) {
3659 TCW_4(__kmp_gtid_mode, 2);
3660 }
3661 }
3662 else {
3663 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3664 TCW_4(__kmp_gtid_mode, 1);
3665 }
3666 }
3667 }
3668
3669#ifdef KMP_ADJUST_BLOCKTIME
3670 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003671 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3673 if ( __kmp_nth > __kmp_avail_proc ) {
3674 __kmp_zero_bt = TRUE;
3675 }
3676 }
3677#endif /* KMP_ADJUST_BLOCKTIME */
3678
3679 /* setup this new hierarchy */
3680 if( ! ( root = __kmp_root[gtid] )) {
3681 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3682 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3683 }
3684
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003685#if KMP_STATS_ENABLED
3686 // Initialize stats as soon as possible (right after gtid assignment).
3687 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3688 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3689 KMP_SET_THREAD_STATE(SERIAL_REGION);
3690 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3691#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692 __kmp_initialize_root( root );
3693
3694 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003695 if( root->r.r_uber_thread ) {
3696 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697 } else {
3698 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3699 if ( __kmp_storage_map ) {
3700 __kmp_print_thread_storage_map( root_thread, gtid );
3701 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702 root_thread->th.th_info .ds.ds_gtid = gtid;
3703 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003705 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706 }
3707 #if USE_FAST_MEMORY
3708 __kmp_initialize_fast_memory( root_thread );
3709 #endif /* USE_FAST_MEMORY */
3710
3711 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003712 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713 __kmp_initialize_bget( root_thread );
3714 #endif
3715 __kmp_init_random( root_thread ); // Initialize random number generator
3716 }
3717
3718 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 if( ! root_thread->th.th_serial_team ) {
3720 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003721 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003722
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003723 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003724#if OMPT_SUPPORT
3725 0, // root parallel id
3726#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727#if OMP_40_ENABLED
3728 proc_bind_default,
3729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003735 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
3737 /* drop root_thread into place */
3738 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3739
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003740 root->r.r_root_team->t.t_threads[0] = root_thread;
3741 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3742 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3743 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3744 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
3746 /* initialize the thread, get it ready to go */
3747 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003748 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003749
3750 /* prepare the master thread for get_gtid() */
3751 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003752
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003753#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003754 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003755#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003756
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757 #ifdef KMP_TDATA_GTID
3758 __kmp_gtid = gtid;
3759 #endif
3760 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3761 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003762
3763 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3764 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003765 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003766 KMP_INIT_BARRIER_STATE ) );
3767 { // Initialize barrier data.
3768 int b;
3769 for ( b = 0; b < bs_last_barrier; ++ b ) {
3770 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003771#if USE_DEBUGGER
3772 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3773#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774 }; // for
3775 }
3776 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3777
Alp Toker763b9392014-02-28 09:42:41 +00003778#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003779# if OMP_40_ENABLED
3780 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3781 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3782 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3783 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3784# endif
3785
Jim Cownie5e8470a2013-09-27 10:38:44 +00003786 if ( TCR_4(__kmp_init_middle) ) {
3787 __kmp_affinity_set_init_mask( gtid, TRUE );
3788 }
Alp Toker763b9392014-02-28 09:42:41 +00003789#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003790
3791 __kmp_root_counter ++;
3792
3793 KMP_MB();
3794 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3795
3796 return gtid;
3797}
3798
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003799#if KMP_NESTED_HOT_TEAMS
3800static int
3801__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3802{
3803 int i, n, nth;
3804 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3805 if( !hot_teams || !hot_teams[level].hot_team ) {
3806 return 0;
3807 }
3808 KMP_DEBUG_ASSERT( level < max_level );
3809 kmp_team_t *team = hot_teams[level].hot_team;
3810 nth = hot_teams[level].hot_team_nth;
3811 n = nth - 1; // master is not freed
3812 if( level < max_level - 1 ) {
3813 for( i = 0; i < nth; ++i ) {
3814 kmp_info_t *th = team->t.t_threads[i];
3815 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3816 if( i > 0 && th->th.th_hot_teams ) {
3817 __kmp_free( th->th.th_hot_teams );
3818 th->th.th_hot_teams = NULL;
3819 }
3820 }
3821 }
3822 __kmp_free_team( root, team, NULL );
3823 return n;
3824}
3825#endif
3826
Jim Cownie5e8470a2013-09-27 10:38:44 +00003827/* Resets a root thread and clear its root and hot teams.
3828 Returns the number of __kmp_threads entries directly and indirectly freed.
3829*/
3830static int
3831__kmp_reset_root(int gtid, kmp_root_t *root)
3832{
3833 kmp_team_t * root_team = root->r.r_root_team;
3834 kmp_team_t * hot_team = root->r.r_hot_team;
3835 int n = hot_team->t.t_nproc;
3836 int i;
3837
3838 KMP_DEBUG_ASSERT( ! root->r.r_active );
3839
3840 root->r.r_root_team = NULL;
3841 root->r.r_hot_team = NULL;
3842 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3843 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003844 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3845#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003846 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003847 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3848 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003849 if( __kmp_hot_teams_max_level > 1 ) {
3850 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3851 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003852 if( th->th.th_hot_teams ) {
3853 __kmp_free( th->th.th_hot_teams );
3854 th->th.th_hot_teams = NULL;
3855 }
3856 }
3857 }
3858#endif
3859 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003860
Jim Cownie5e8470a2013-09-27 10:38:44 +00003861 //
3862 // Before we can reap the thread, we need to make certain that all
3863 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3864 //
3865 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3866 __kmp_wait_to_unref_task_teams();
3867 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003868
3869 #if KMP_OS_WINDOWS
3870 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3871 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3872 (LPVOID)&(root->r.r_uber_thread->th),
3873 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3874 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3875 #endif /* KMP_OS_WINDOWS */
3876
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003877#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003878 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003879 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3880 int gtid = __kmp_get_gtid();
3881 __ompt_thread_end(ompt_thread_initial, gtid);
3882 }
3883#endif
3884
Jim Cownie5e8470a2013-09-27 10:38:44 +00003885 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3886 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3887
3888 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3889 root->r.r_uber_thread = NULL;
3890 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003891 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003892
3893 return n;
3894}
3895
3896void
3897__kmp_unregister_root_current_thread( int gtid )
3898{
Jim Cownie77c2a632014-09-03 11:34:33 +00003899 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003900 /* this lock should be ok, since unregister_root_current_thread is never called during
3901 * and abort, only during a normal close. furthermore, if you have the
3902 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003903
3904 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3905 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3906 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3907 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3908 return;
3909 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910 kmp_root_t *root = __kmp_root[gtid];
3911
Jim Cownie5e8470a2013-09-27 10:38:44 +00003912 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3913 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3914 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3915 KMP_ASSERT( root->r.r_active == FALSE );
3916
Jim Cownie5e8470a2013-09-27 10:38:44 +00003917
3918 KMP_MB();
3919
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003920#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003921 kmp_info_t * thread = __kmp_threads[gtid];
3922 kmp_team_t * team = thread->th.th_team;
3923 kmp_task_team_t * task_team = thread->th.th_task_team;
3924
3925 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003926 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3927#if OMPT_SUPPORT
3928 // the runtime is shutting down so we won't report any events
3929 thread->th.ompt_thread_info.state = ompt_state_undefined;
3930#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003931 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003932 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003933#endif
3934
Jim Cownie5e8470a2013-09-27 10:38:44 +00003935 __kmp_reset_root(gtid, root);
3936
3937 /* free up this thread slot */
3938 __kmp_gtid_set_specific( KMP_GTID_DNE );
3939#ifdef KMP_TDATA_GTID
3940 __kmp_gtid = KMP_GTID_DNE;
3941#endif
3942
3943 KMP_MB();
3944 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3945
3946 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3947}
3948
Jonathan Peyton2321d572015-06-08 19:25:25 +00003949#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003950/* __kmp_forkjoin_lock must be already held
3951 Unregisters a root thread that is not the current thread. Returns the number of
3952 __kmp_threads entries freed as a result.
3953 */
3954static int
3955__kmp_unregister_root_other_thread( int gtid )
3956{
3957 kmp_root_t *root = __kmp_root[gtid];
3958 int r;
3959
3960 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3961 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3962 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3963 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3964 KMP_ASSERT( root->r.r_active == FALSE );
3965
3966 r = __kmp_reset_root(gtid, root);
3967 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3968 return r;
3969}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003970#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003971
Jim Cownie5e8470a2013-09-27 10:38:44 +00003972#if KMP_DEBUG
3973void __kmp_task_info() {
3974
3975 kmp_int32 gtid = __kmp_entry_gtid();
3976 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3977 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003978 kmp_team_t *steam = this_thr->th.th_serial_team;
3979 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003980
3981 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3982 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3983}
3984#endif // KMP_DEBUG
3985
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003987 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003988 * initialization of features as much as possible */
3989static void
3990__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3991{
3992 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3993 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003996 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003997 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003998 KMP_DEBUG_ASSERT( team->t.t_threads );
3999 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4000 KMP_DEBUG_ASSERT( master );
4001 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004002
4003 KMP_MB();
4004
4005 TCW_SYNC_PTR(this_thr->th.th_team, team);
4006
4007 this_thr->th.th_info.ds.ds_tid = tid;
4008 this_thr->th.th_set_nproc = 0;
Andrey Churbanov581490e2017-02-06 18:53:32 +00004009 if (__kmp_tasking_mode != tskm_immediate_exec)
4010 // When tasking is possible, threads are not safe to reap until they are
4011 // done tasking; this will be set when tasking code is exited in wait
4012 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4013 else // no tasking --> always safe to reap
4014 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015#if OMP_40_ENABLED
4016 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00004017# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018 this_thr->th.th_new_place = this_thr->th.th_current_place;
4019# endif
4020#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004021 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004022
4023 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004024 this_thr->th.th_team_nproc = team->t.t_nproc;
4025 this_thr->th.th_team_master = master;
4026 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004027 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4028
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004029 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004030
4031 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4032 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4033
4034 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4035
4036 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4037 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4038 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039
4040 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004041 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
4043 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004044
4045#ifdef BUILD_TV
4046 this_thr->th.th_local.tv_data = 0;
4047#endif
4048
4049 if ( ! this_thr->th.th_pri_common ) {
4050 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4051 if ( __kmp_storage_map ) {
4052 __kmp_print_storage_map_gtid(
4053 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4054 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4055 );
4056 }; // if
4057 this_thr->th.th_pri_head = NULL;
4058 }; // if
4059
4060 /* Initialize dynamic dispatch */
4061 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004062 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063 /*
4064 * Use team max_nproc since this will never change for the team.
4065 */
4066 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004067 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004068 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4069 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004070 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4072
4073 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004074#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004075 dispatch->th_doacross_buf_idx = 0;
4076#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004077 if( ! dispatch->th_disp_buffer ) {
4078 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004079
4080 if ( __kmp_storage_map ) {
4081 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004082 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004083 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4084 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4085 gtid, team->t.t_id, gtid );
4086 }
4087 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004088 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004089 }
4090
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004091 dispatch->th_dispatch_pr_current = 0;
4092 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004093
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004094 dispatch->th_deo_fcn = 0; /* ORDERED */
4095 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004096 }
4097
4098 this_thr->th.th_next_pool = NULL;
4099
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004100 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004101 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004102 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4103 this_thr->th.th_task_state_top = 0;
4104 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004105 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4106 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004107 }
4108
Jim Cownie5e8470a2013-09-27 10:38:44 +00004109 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4110 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4111
4112 KMP_MB();
4113}
4114
4115
4116/* allocate a new thread for the requesting team. this is only called from within a
4117 * forkjoin critical section. we will first try to get an available thread from the
4118 * thread pool. if none is available, we will fork a new one assuming we are able
4119 * to create a new one. this should be assured, as the caller should check on this
4120 * first.
4121 */
4122kmp_info_t *
4123__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4124{
4125 kmp_team_t *serial_team;
4126 kmp_info_t *new_thr;
4127 int new_gtid;
4128
4129 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4130 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004131#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004132 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004134 KMP_MB();
4135
4136 /* first, try to get one from the thread pool */
4137 if ( __kmp_thread_pool ) {
4138
4139 new_thr = (kmp_info_t*)__kmp_thread_pool;
4140 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4141 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4142 __kmp_thread_pool_insert_pt = NULL;
4143 }
4144 TCW_4(new_thr->th.th_in_pool, FALSE);
4145 //
4146 // Don't touch th_active_in_pool or th_active.
4147 // The worker thread adjusts those flags as it sleeps/awakens.
4148 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004149 __kmp_thread_pool_nth--;
4150
4151 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4152 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004153 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004154 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4155 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4156
4157 /* setup the thread structure */
4158 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4159 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4160
4161 TCW_4(__kmp_nth, __kmp_nth + 1);
4162
Jonathan Peyton54127982015-11-04 21:37:48 +00004163 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004164 new_thr->th.th_task_state_top = 0;
4165 new_thr->th.th_task_state_stack_sz = 4;
4166
Jim Cownie5e8470a2013-09-27 10:38:44 +00004167#ifdef KMP_ADJUST_BLOCKTIME
4168 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004169 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4171 if ( __kmp_nth > __kmp_avail_proc ) {
4172 __kmp_zero_bt = TRUE;
4173 }
4174 }
4175#endif /* KMP_ADJUST_BLOCKTIME */
4176
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004177#if KMP_DEBUG
4178 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4179 int b;
4180 kmp_balign_t * balign = new_thr->th.th_bar;
4181 for( b = 0; b < bs_last_barrier; ++ b )
4182 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4183#endif
4184
Jim Cownie5e8470a2013-09-27 10:38:44 +00004185 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4186 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4187
4188 KMP_MB();
4189 return new_thr;
4190 }
4191
4192
4193 /* no, well fork a new one */
4194 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4195 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4196
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004197#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198 //
4199 // If this is the first worker thread the RTL is creating, then also
4200 // launch the monitor thread. We try to do this as early as possible.
4201 //
4202 if ( ! TCR_4( __kmp_init_monitor ) ) {
4203 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4204 if ( ! TCR_4( __kmp_init_monitor ) ) {
4205 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4206 TCW_4( __kmp_init_monitor, 1 );
4207 __kmp_create_monitor( & __kmp_monitor );
4208 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004209 #if KMP_OS_WINDOWS
4210 // AC: wait until monitor has started. This is a fix for CQ232808.
4211 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4212 // work in between, then there is high probability that monitor thread started after
4213 // the library shutdown. At shutdown it is too late to cope with the problem, because
4214 // when the master is in DllMain (process detach) the monitor has no chances to start
4215 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4216 // because all the memory which the monitor can access is going to be released/reset.
4217 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4218 KMP_YIELD( TRUE );
4219 }
4220 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4221 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222 }
4223 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4224 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004225#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226
4227 KMP_MB();
4228 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4229 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4230 }
4231
4232 /* allocate space for it. */
4233 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4234
4235 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4236
4237 if ( __kmp_storage_map ) {
4238 __kmp_print_thread_storage_map( new_thr, new_gtid );
4239 }
4240
4241 /* add the reserve serialized team, initialized from the team's master thread */
4242 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004243 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004244 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004245
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004246 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004247 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004248#if OMPT_SUPPORT
4249 0, // root parallel id
4250#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004251#if OMP_40_ENABLED
4252 proc_bind_default,
4253#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004255 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004256 }
4257 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004258 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4259 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004260 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4261 new_thr ) );
4262
4263 /* setup the thread structures */
4264 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4265
4266 #if USE_FAST_MEMORY
4267 __kmp_initialize_fast_memory( new_thr );
4268 #endif /* USE_FAST_MEMORY */
4269
4270 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004271 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004272 __kmp_initialize_bget( new_thr );
4273 #endif
4274
4275 __kmp_init_random( new_thr ); // Initialize random number generator
4276
4277 /* Initialize these only once when thread is grabbed for a team allocation */
4278 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4279 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4280
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004281 int b;
4282 kmp_balign_t * balign = new_thr->th.th_bar;
4283 for(b=0; b<bs_last_barrier; ++b) {
4284 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4285 balign[b].bb.team = NULL;
4286 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4287 balign[b].bb.use_oncore_barrier = 0;
4288 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004289
4290 new_thr->th.th_spin_here = FALSE;
4291 new_thr->th.th_next_waiting = 0;
4292
Alp Toker98758b02014-03-02 04:12:06 +00004293#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004294 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4295 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4296 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4297 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4298#endif
4299
4300 TCW_4(new_thr->th.th_in_pool, FALSE);
4301 new_thr->th.th_active_in_pool = FALSE;
4302 TCW_4(new_thr->th.th_active, TRUE);
4303
4304 /* adjust the global counters */
4305 __kmp_all_nth ++;
4306 __kmp_nth ++;
4307
4308 //
4309 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4310 // for low numbers of procs, and method #2 (keyed API call) for higher
4311 // numbers of procs.
4312 //
4313 if ( __kmp_adjust_gtid_mode ) {
4314 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4315 if ( TCR_4(__kmp_gtid_mode) != 2) {
4316 TCW_4(__kmp_gtid_mode, 2);
4317 }
4318 }
4319 else {
4320 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4321 TCW_4(__kmp_gtid_mode, 1);
4322 }
4323 }
4324 }
4325
4326#ifdef KMP_ADJUST_BLOCKTIME
4327 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004328 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004329 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4330 if ( __kmp_nth > __kmp_avail_proc ) {
4331 __kmp_zero_bt = TRUE;
4332 }
4333 }
4334#endif /* KMP_ADJUST_BLOCKTIME */
4335
4336 /* actually fork it and create the new worker thread */
4337 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4338 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4339 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4340
Jim Cownie5e8470a2013-09-27 10:38:44 +00004341 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4342 KMP_MB();
4343 return new_thr;
4344}
4345
4346/*
4347 * reinitialize team for reuse.
4348 *
4349 * The hot team code calls this case at every fork barrier, so EPCC barrier
4350 * test are extremely sensitive to changes in it, esp. writes to the team
4351 * struct, which cause a cache invalidation in all threads.
4352 *
4353 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4354 */
4355static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004356__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004357 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4358 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004359 KMP_DEBUG_ASSERT( team && new_icvs);
4360 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004361 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004362
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004363 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004364
Jim Cownie181b4bb2013-12-23 17:28:57 +00004365 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004366 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004367 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004368
4369 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4370 team->t.t_threads[0], team ) );
4371}
4372
Jim Cownie5e8470a2013-09-27 10:38:44 +00004373
4374/* initialize the team data structure
4375 * this assumes the t_threads and t_max_nproc are already set
4376 * also, we don't touch the arguments */
4377static void
4378__kmp_initialize_team(
4379 kmp_team_t * team,
4380 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004381 kmp_internal_control_t * new_icvs,
4382 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004383) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004384 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4385
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386 /* verify */
4387 KMP_DEBUG_ASSERT( team );
4388 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4389 KMP_DEBUG_ASSERT( team->t.t_threads );
4390 KMP_MB();
4391
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004392 team->t.t_master_tid = 0; /* not needed */
4393 /* team->t.t_master_bar; not needed */
4394 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4395 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004396
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004397 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4398 team->t.t_next_pool = NULL;
4399 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004400
4401 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004402 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004404 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4405 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406
4407#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004408 team->t.t_fp_control_saved = FALSE; /* not needed */
4409 team->t.t_x87_fpu_control_word = 0; /* not needed */
4410 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004411#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004413 team->t.t_construct = 0;
4414 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004415
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004416 team->t.t_ordered .dt.t_value = 0;
4417 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004419 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004420
4421#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004422 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004423#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004424 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004426 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004427
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004428 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004429
4430 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004431 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432}
4433
Alp Toker98758b02014-03-02 04:12:06 +00004434#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435/* Sets full mask for thread and returns old mask, no changes to structures. */
4436static void
4437__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4438{
4439 if ( KMP_AFFINITY_CAPABLE() ) {
4440 int status;
4441 if ( old_mask != NULL ) {
4442 status = __kmp_get_system_affinity( old_mask, TRUE );
4443 int error = errno;
4444 if ( status != 0 ) {
4445 __kmp_msg(
4446 kmp_ms_fatal,
4447 KMP_MSG( ChangeThreadAffMaskError ),
4448 KMP_ERR( error ),
4449 __kmp_msg_null
4450 );
4451 }
4452 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004453 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004454 }
4455}
4456#endif
4457
Alp Toker98758b02014-03-02 04:12:06 +00004458#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004459
4460//
4461// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4462// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004463// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004464// The master thread's partition should already include its current binding.
4465//
4466static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004467__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004468{
4469 //
4470 // Copy the master thread's place partion to the team struct
4471 //
4472 kmp_info_t *master_th = team->t.t_threads[0];
4473 KMP_DEBUG_ASSERT( master_th != NULL );
4474 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4475 int first_place = master_th->th.th_first_place;
4476 int last_place = master_th->th.th_last_place;
4477 int masters_place = master_th->th.th_current_place;
4478 team->t.t_first_place = first_place;
4479 team->t.t_last_place = last_place;
4480
4481 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4482 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4483 masters_place, first_place, last_place ) );
4484
4485 switch ( proc_bind ) {
4486
4487 case proc_bind_default:
4488 //
4489 // serial teams might have the proc_bind policy set to
4490 // proc_bind_default. It doesn't matter, as we don't
4491 // rebind the master thread for any proc_bind policy.
4492 //
4493 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4494 break;
4495
4496 case proc_bind_master:
4497 {
4498 int f;
4499 int n_th = team->t.t_nproc;
4500 for ( f = 1; f < n_th; f++ ) {
4501 kmp_info_t *th = team->t.t_threads[f];
4502 KMP_DEBUG_ASSERT( th != NULL );
4503 th->th.th_first_place = first_place;
4504 th->th.th_last_place = last_place;
4505 th->th.th_new_place = masters_place;
4506
4507 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4508 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4509 team->t.t_id, f, masters_place, first_place, last_place ) );
4510 }
4511 }
4512 break;
4513
4514 case proc_bind_close:
4515 {
4516 int f;
4517 int n_th = team->t.t_nproc;
4518 int n_places;
4519 if ( first_place <= last_place ) {
4520 n_places = last_place - first_place + 1;
4521 }
4522 else {
4523 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4524 }
4525 if ( n_th <= n_places ) {
4526 int place = masters_place;
4527 for ( f = 1; f < n_th; f++ ) {
4528 kmp_info_t *th = team->t.t_threads[f];
4529 KMP_DEBUG_ASSERT( th != NULL );
4530
4531 if ( place == last_place ) {
4532 place = first_place;
4533 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004534 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004535 place = 0;
4536 }
4537 else {
4538 place++;
4539 }
4540 th->th.th_first_place = first_place;
4541 th->th.th_last_place = last_place;
4542 th->th.th_new_place = place;
4543
4544 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4545 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4546 team->t.t_id, f, place, first_place, last_place ) );
4547 }
4548 }
4549 else {
4550 int S, rem, gap, s_count;
4551 S = n_th / n_places;
4552 s_count = 0;
4553 rem = n_th - ( S * n_places );
4554 gap = rem > 0 ? n_places/rem : n_places;
4555 int place = masters_place;
4556 int gap_ct = gap;
4557 for ( f = 0; f < n_th; f++ ) {
4558 kmp_info_t *th = team->t.t_threads[f];
4559 KMP_DEBUG_ASSERT( th != NULL );
4560
4561 th->th.th_first_place = first_place;
4562 th->th.th_last_place = last_place;
4563 th->th.th_new_place = place;
4564 s_count++;
4565
4566 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4567 // do nothing, add an extra thread to place on next iteration
4568 }
4569 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4570 // we added an extra thread to this place; move to next place
4571 if ( place == last_place ) {
4572 place = first_place;
4573 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004574 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004575 place = 0;
4576 }
4577 else {
4578 place++;
4579 }
4580 s_count = 0;
4581 gap_ct = 1;
4582 rem--;
4583 }
4584 else if (s_count == S) { // place full; don't add extra
4585 if ( place == last_place ) {
4586 place = first_place;
4587 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004588 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004589 place = 0;
4590 }
4591 else {
4592 place++;
4593 }
4594 gap_ct++;
4595 s_count = 0;
4596 }
4597
4598 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4599 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4600 team->t.t_id, f, th->th.th_new_place, first_place,
4601 last_place ) );
4602 }
4603 KMP_DEBUG_ASSERT( place == masters_place );
4604 }
4605 }
4606 break;
4607
4608 case proc_bind_spread:
4609 {
4610 int f;
4611 int n_th = team->t.t_nproc;
4612 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004613 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004614 if ( first_place <= last_place ) {
4615 n_places = last_place - first_place + 1;
4616 }
4617 else {
4618 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4619 }
4620 if ( n_th <= n_places ) {
4621 int place = masters_place;
4622 int S = n_places/n_th;
4623 int s_count, rem, gap, gap_ct;
4624 rem = n_places - n_th*S;
4625 gap = rem ? n_th/rem : 1;
4626 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004627 thidx = n_th;
4628 if (update_master_only == 1)
4629 thidx = 1;
4630 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004631 kmp_info_t *th = team->t.t_threads[f];
4632 KMP_DEBUG_ASSERT( th != NULL );
4633
4634 th->th.th_first_place = place;
4635 th->th.th_new_place = place;
4636 s_count = 1;
4637 while (s_count < S) {
4638 if ( place == last_place ) {
4639 place = first_place;
4640 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004641 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004642 place = 0;
4643 }
4644 else {
4645 place++;
4646 }
4647 s_count++;
4648 }
4649 if (rem && (gap_ct == gap)) {
4650 if ( place == last_place ) {
4651 place = first_place;
4652 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004653 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004654 place = 0;
4655 }
4656 else {
4657 place++;
4658 }
4659 rem--;
4660 gap_ct = 0;
4661 }
4662 th->th.th_last_place = place;
4663 gap_ct++;
4664
4665 if ( place == last_place ) {
4666 place = first_place;
4667 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004668 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004669 place = 0;
4670 }
4671 else {
4672 place++;
4673 }
4674
4675 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4676 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4677 team->t.t_id, f, th->th.th_new_place,
4678 th->th.th_first_place, th->th.th_last_place ) );
4679 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004680 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004681 }
4682 else {
4683 int S, rem, gap, s_count;
4684 S = n_th / n_places;
4685 s_count = 0;
4686 rem = n_th - ( S * n_places );
4687 gap = rem > 0 ? n_places/rem : n_places;
4688 int place = masters_place;
4689 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004690 thidx = n_th;
4691 if (update_master_only == 1)
4692 thidx = 1;
4693 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004694 kmp_info_t *th = team->t.t_threads[f];
4695 KMP_DEBUG_ASSERT( th != NULL );
4696
4697 th->th.th_first_place = place;
4698 th->th.th_last_place = place;
4699 th->th.th_new_place = place;
4700 s_count++;
4701
4702 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4703 // do nothing, add an extra thread to place on next iteration
4704 }
4705 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4706 // we added an extra thread to this place; move on to next place
4707 if ( place == last_place ) {
4708 place = first_place;
4709 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004710 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004711 place = 0;
4712 }
4713 else {
4714 place++;
4715 }
4716 s_count = 0;
4717 gap_ct = 1;
4718 rem--;
4719 }
4720 else if (s_count == S) { // place is full; don't add extra thread
4721 if ( place == last_place ) {
4722 place = first_place;
4723 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004724 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004725 place = 0;
4726 }
4727 else {
4728 place++;
4729 }
4730 gap_ct++;
4731 s_count = 0;
4732 }
4733
4734 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4735 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4736 team->t.t_id, f, th->th.th_new_place,
4737 th->th.th_first_place, th->th.th_last_place) );
4738 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004739 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740 }
4741 }
4742 break;
4743
4744 default:
4745 break;
4746 }
4747
4748 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4749}
4750
Alp Toker98758b02014-03-02 04:12:06 +00004751#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004752
4753/* allocate a new team data structure to use. take one off of the free pool if available */
4754kmp_team_t *
4755__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004756#if OMPT_SUPPORT
4757 ompt_parallel_id_t ompt_parallel_id,
4758#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004759#if OMP_40_ENABLED
4760 kmp_proc_bind_t new_proc_bind,
4761#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004762 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004763 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004764{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00004765 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004766 int f;
4767 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004768 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004769 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004770
4771 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4772 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4773 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4774 KMP_MB();
4775
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004776#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004777 kmp_hot_team_ptr_t *hot_teams;
4778 if( master ) {
4779 team = master->th.th_team;
4780 level = team->t.t_active_level;
4781 if( master->th.th_teams_microtask ) { // in teams construct?
4782 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4783 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4784 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4785 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4786 }
4787 }
4788 hot_teams = master->th.th_hot_teams;
4789 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4790 { // hot team has already been allocated for given level
4791 use_hot_team = 1;
4792 } else {
4793 use_hot_team = 0;
4794 }
4795 }
4796#endif
4797 // Optimization to use a "hot" team
4798 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004799 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004800#if KMP_NESTED_HOT_TEAMS
4801 team = hot_teams[level].hot_team;
4802#else
4803 team = root->r.r_hot_team;
4804#endif
4805#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004806 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004807 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4808 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004809 }
4810#endif
4811
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004812 // Has the number of threads changed?
4813 /* Let's assume the most common case is that the number of threads is unchanged, and
4814 put that case first. */
4815 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4816 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004817 // This case can mean that omp_set_num_threads() was called and the hot team size
4818 // was already reduced, so we check the special flag
4819 if ( team->t.t_size_changed == -1 ) {
4820 team->t.t_size_changed = 1;
4821 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004822 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004823 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004824
4825 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004826 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004827 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4828 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004829 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004830
4831 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4832
4833 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4834 0, team->t.t_threads[0], team ) );
4835 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4836
4837#if OMP_40_ENABLED
4838# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004839 if ( ( team->t.t_size_changed == 0 )
4840 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004841 if (new_proc_bind == proc_bind_spread) {
4842 __kmp_partition_places(team, 1); // add flag to update only master for spread
4843 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004844 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4845 team->t.t_id, new_proc_bind, team->t.t_first_place,
4846 team->t.t_last_place ) );
4847 }
4848 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004849 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004850 __kmp_partition_places( team );
4851 }
4852# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004853 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004854# endif /* KMP_AFFINITY_SUPPORTED */
4855#endif /* OMP_40_ENABLED */
4856 }
4857 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004858 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4859
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004860 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004861#if KMP_NESTED_HOT_TEAMS
4862 if( __kmp_hot_teams_mode == 0 ) {
4863 // AC: saved number of threads should correspond to team's value in this mode,
4864 // can be bigger in mode 1, when hot team has some threads in reserve
4865 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4866 hot_teams[level].hot_team_nth = new_nproc;
4867#endif // KMP_NESTED_HOT_TEAMS
4868 /* release the extra threads we don't need any more */
4869 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4870 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004871 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4872 // When decreasing team size, threads no longer in the team should unref task team.
4873 team->t.t_threads[f]->th.th_task_team = NULL;
4874 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004875 __kmp_free_thread( team->t.t_threads[ f ] );
4876 team->t.t_threads[ f ] = NULL;
4877 }
4878#if KMP_NESTED_HOT_TEAMS
4879 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004880 else {
4881 // When keeping extra threads in team, switch threads to wait on own b_go flag
4882 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4883 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4884 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4885 for (int b=0; b<bs_last_barrier; ++b) {
4886 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4887 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4888 }
4889 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4890 }
4891 }
4892 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004893#endif // KMP_NESTED_HOT_TEAMS
4894 team->t.t_nproc = new_nproc;
4895 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004896 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4897 team->t.t_sched.chunk != new_icvs->sched.chunk)
4898 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004899 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004900
Jim Cownie5e8470a2013-09-27 10:38:44 +00004901 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004902 for(f = 0; f < new_nproc; ++f) {
4903 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004904 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004905 // restore the current task state of the master thread: should be the implicit task
4906 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4907 0, team->t.t_threads[0], team ) );
4908
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004909 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004910
4911#ifdef KMP_DEBUG
4912 for ( f = 0; f < team->t.t_nproc; f++ ) {
4913 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4914 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4915 }
4916#endif
4917
4918#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004919 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004920# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004921 __kmp_partition_places( team );
4922# endif
4923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004924 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004925 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004926#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004927 kmp_affin_mask_t *old_mask;
4928 if ( KMP_AFFINITY_CAPABLE() ) {
4929 KMP_CPU_ALLOC(old_mask);
4930 }
4931#endif
4932
4933 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4934
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004935 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004936
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004937#if KMP_NESTED_HOT_TEAMS
4938 int avail_threads = hot_teams[level].hot_team_nth;
4939 if( new_nproc < avail_threads )
4940 avail_threads = new_nproc;
4941 kmp_info_t **other_threads = team->t.t_threads;
4942 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4943 // Adjust barrier data of reserved threads (if any) of the team
4944 // Other data will be set in __kmp_initialize_info() below.
4945 int b;
4946 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4947 for ( b = 0; b < bs_last_barrier; ++ b ) {
4948 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4949 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004950#if USE_DEBUGGER
4951 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4952#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004953 }
4954 }
4955 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4956 // we have all needed threads in reserve, no need to allocate any
4957 // this only possible in mode 1, cannot have reserved threads in mode 0
4958 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4959 team->t.t_nproc = new_nproc; // just get reserved threads involved
4960 } else {
4961 // we may have some threads in reserve, but not enough
4962 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4963 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4964#endif // KMP_NESTED_HOT_TEAMS
4965 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004966 /* reallocate larger arrays */
4967 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004968 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004969 }
4970
Alp Toker98758b02014-03-02 04:12:06 +00004971#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004972 /* Temporarily set full mask for master thread before
4973 creation of workers. The reason is that workers inherit
4974 the affinity from master, so if a lot of workers are
4975 created on the single core quickly, they don't get
4976 a chance to set their own affinity for a long time.
4977 */
4978 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4979#endif
4980
4981 /* allocate new threads for the hot team */
4982 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4983 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4984 KMP_DEBUG_ASSERT( new_worker );
4985 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986
Jonathan Peytond26e2132015-09-10 18:44:30 +00004987 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004988 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4989 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4990 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4991
4992 { // Initialize barrier data for new threads.
4993 int b;
4994 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004995 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004996 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004997 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004998#if USE_DEBUGGER
4999 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5000#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005001 }
5002 }
5003 }
5004
Alp Toker98758b02014-03-02 04:12:06 +00005005#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005006 if ( KMP_AFFINITY_CAPABLE() ) {
5007 /* Restore initial master thread's affinity mask */
5008 __kmp_set_system_affinity( old_mask, TRUE );
5009 KMP_CPU_FREE(old_mask);
5010 }
5011#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005012#if KMP_NESTED_HOT_TEAMS
5013 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5014#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005015 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00005016 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005017 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005018
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005019 /* reinitialize the threads */
5020 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005021 for (f=0; f < team->t.t_nproc; ++f)
5022 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5023 if (level) { // set th_task_state for new threads in nested hot team
5024 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005025 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005026 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5027 for (f=old_nproc; f < team->t.t_nproc; ++f)
5028 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005029 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005030 else { // set th_task_state for new threads in non-nested hot team
5031 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5032 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005033 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005034 }
5035
Jim Cownie5e8470a2013-09-27 10:38:44 +00005036#ifdef KMP_DEBUG
5037 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5038 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5039 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5040 }
5041#endif
5042
5043#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005044 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005045# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005046 __kmp_partition_places( team );
5047# endif
5048#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005049 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005050
5051#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005052 kmp_info_t *master = team->t.t_threads[0];
5053 if( master->th.th_teams_microtask ) {
5054 for( f = 1; f < new_nproc; ++f ) {
5055 // propagate teams construct specific info to workers
5056 kmp_info_t *thr = team->t.t_threads[f];
5057 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5058 thr->th.th_teams_level = master->th.th_teams_level;
5059 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005060 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005061 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005062#endif /* OMP_40_ENABLED */
5063#if KMP_NESTED_HOT_TEAMS
5064 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005065 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005066 for( f = 1; f < new_nproc; ++f ) {
5067 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005068 int b;
5069 kmp_balign_t * balign = thr->th.th_bar;
5070 for( b = 0; b < bs_last_barrier; ++ b ) {
5071 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5072 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005073#if USE_DEBUGGER
5074 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5075#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005076 }
5077 }
5078 }
5079#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005080
5081 /* reallocate space for arguments if necessary */
5082 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005083 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005084 //
5085 // The hot team re-uses the previous task team,
5086 // if untouched during the previous release->gather phase.
5087 //
5088
5089 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5090
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005091#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005092 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005093 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5094 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005095 }
5096#endif
5097
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005098#if OMPT_SUPPORT
5099 __ompt_team_assign_id(team, ompt_parallel_id);
5100#endif
5101
Jim Cownie5e8470a2013-09-27 10:38:44 +00005102 KMP_MB();
5103
5104 return team;
5105 }
5106
5107 /* next, let's try to take one from the team pool */
5108 KMP_MB();
5109 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5110 {
5111 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5112 if ( team->t.t_max_nproc >= max_nproc ) {
5113 /* take this team from the team pool */
5114 __kmp_team_pool = team->t.t_next_pool;
5115
5116 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005117 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005118
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005119 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5120 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5121 team->t.t_task_team[0] = NULL;
5122 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005123
5124 /* reallocate space for arguments if necessary */
5125 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005126 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005127
5128 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5129 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5130 { // Initialize barrier data.
5131 int b;
5132 for ( b = 0; b < bs_last_barrier; ++ b) {
5133 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005134#if USE_DEBUGGER
5135 team->t.t_bar[ b ].b_master_arrived = 0;
5136 team->t.t_bar[ b ].b_team_arrived = 0;
5137#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005138 }
5139 }
5140
5141#if OMP_40_ENABLED
5142 team->t.t_proc_bind = new_proc_bind;
5143#endif
5144
5145 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005146
5147#if OMPT_SUPPORT
5148 __ompt_team_assign_id(team, ompt_parallel_id);
5149#endif
5150
Jim Cownie5e8470a2013-09-27 10:38:44 +00005151 KMP_MB();
5152
5153 return team;
5154 }
5155
5156 /* reap team if it is too small, then loop back and check the next one */
5157 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5158 /* TODO: Use technique to find the right size hot-team, don't reap them */
5159 team = __kmp_reap_team( team );
5160 __kmp_team_pool = team;
5161 }
5162
5163 /* nothing available in the pool, no matter, make a new team! */
5164 KMP_MB();
5165 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5166
5167 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005168 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005169 /* NOTE well, for some reason allocating one big buffer and dividing it
5170 * up seems to really hurt performance a lot on the P4, so, let's not use
5171 * this... */
5172 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005173
5174 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005175 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005176
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005177 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5178 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5179 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5180 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005181
5182 if ( __kmp_storage_map ) {
5183 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5184 }
5185
5186 /* allocate space for arguments */
5187 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005188 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005189
5190 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5191 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5192 { // Initialize barrier data.
5193 int b;
5194 for ( b = 0; b < bs_last_barrier; ++ b ) {
5195 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005196#if USE_DEBUGGER
5197 team->t.t_bar[ b ].b_master_arrived = 0;
5198 team->t.t_bar[ b ].b_team_arrived = 0;
5199#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005200 }
5201 }
5202
5203#if OMP_40_ENABLED
5204 team->t.t_proc_bind = new_proc_bind;
5205#endif
5206
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005207#if OMPT_SUPPORT
5208 __ompt_team_assign_id(team, ompt_parallel_id);
5209 team->t.ompt_serialized_team_info = NULL;
5210#endif
5211
Jim Cownie5e8470a2013-09-27 10:38:44 +00005212 KMP_MB();
5213
5214 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5215
5216 return team;
5217}
5218
5219/* TODO implement hot-teams at all levels */
5220/* TODO implement lazy thread release on demand (disband request) */
5221
5222/* free the team. return it to the team pool. release all the threads
5223 * associated with it */
5224void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005225__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005226{
5227 int f;
5228 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5229
5230 /* verify state */
5231 KMP_DEBUG_ASSERT( root );
5232 KMP_DEBUG_ASSERT( team );
5233 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5234 KMP_DEBUG_ASSERT( team->t.t_threads );
5235
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005236 int use_hot_team = team == root->r.r_hot_team;
5237#if KMP_NESTED_HOT_TEAMS
5238 int level;
5239 kmp_hot_team_ptr_t *hot_teams;
5240 if( master ) {
5241 level = team->t.t_active_level - 1;
5242 if( master->th.th_teams_microtask ) { // in teams construct?
5243 if( master->th.th_teams_size.nteams > 1 ) {
5244 ++level; // level was not increased in teams construct for team_of_masters
5245 }
5246 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5247 master->th.th_teams_level == team->t.t_level ) {
5248 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5249 } // team->t.t_level will be increased inside parallel
5250 }
5251 hot_teams = master->th.th_hot_teams;
5252 if( level < __kmp_hot_teams_max_level ) {
5253 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5254 use_hot_team = 1;
5255 }
5256 }
5257#endif // KMP_NESTED_HOT_TEAMS
5258
Jim Cownie5e8470a2013-09-27 10:38:44 +00005259 /* team is done working */
5260 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005261 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005262 // Do not reset pointer to parent team to NULL for hot teams.
5263
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005264 /* if we are non-hot team, release our threads */
5265 if( ! use_hot_team ) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005266 if (__kmp_tasking_mode != tskm_immediate_exec) {
5267 // Wait for threads to reach reapable state
5268 for (f = 1; f < team->t.t_nproc; ++f) {
5269 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005270 kmp_info_t *th = team->t.t_threads[f];
5271 volatile kmp_uint32 *state = &th->th.th_reap_state;
Andrey Churbanov581490e2017-02-06 18:53:32 +00005272 while (*state != KMP_SAFE_TO_REAP) {
5273#if KMP_OS_WINDOWS
5274 // On Windows a thread can be killed at any time, check this
5275 DWORD ecode;
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005276 if (!__kmp_is_thread_alive(th, &ecode)) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005277 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005278 break;
5279 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005280#endif
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005281 // first check if thread is sleeping
5282 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5283 if (fl.is_sleeping())
5284 fl.resume(__kmp_gtid_from_thread(th));
5285 KMP_CPU_PAUSE();
Andrey Churbanov581490e2017-02-06 18:53:32 +00005286 }
5287 }
5288
Jonathan Peyton54127982015-11-04 21:37:48 +00005289 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005290 int tt_idx;
5291 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005292 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5293 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005294 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5295 team->t.t_threads[f]->th.th_task_team = NULL;
5296 }
5297 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005298#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005299 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005300#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005301 team->t.t_task_team[tt_idx] = NULL;
5302 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005303 }
5304 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005305
5306 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005307 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005308 team->t.t_level = 0;
5309 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005310
Jim Cownie5e8470a2013-09-27 10:38:44 +00005311 /* free the worker threads */
5312 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5313 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5314 __kmp_free_thread( team->t.t_threads[ f ] );
5315 team->t.t_threads[ f ] = NULL;
5316 }
5317
Jim Cownie5e8470a2013-09-27 10:38:44 +00005318 /* put the team back in the team pool */
5319 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005320 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005321 __kmp_team_pool = (volatile kmp_team_t*) team;
5322 }
5323
5324 KMP_MB();
5325}
5326
5327
5328/* reap the team. destroy it, reclaim all its resources and free its memory */
5329kmp_team_t *
5330__kmp_reap_team( kmp_team_t *team )
5331{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005332 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005333
5334 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005335 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5336 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5337 KMP_DEBUG_ASSERT( team->t.t_threads );
5338 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005339
5340 /* TODO clean the threads that are a part of this? */
5341
5342 /* free stuff */
5343
5344 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005345 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5346 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005347 __kmp_free( team );
5348
5349 KMP_MB();
5350 return next_pool;
5351}
5352
5353//
5354// Free the thread. Don't reap it, just place it on the pool of available
5355// threads.
5356//
5357// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5358// binding for the affinity mechanism to be useful.
5359//
5360// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5361// However, we want to avoid a potential performance problem by always
5362// scanning through the list to find the correct point at which to insert
5363// the thread (potential N**2 behavior). To do this we keep track of the
5364// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5365// With single-level parallelism, threads will always be added to the tail
5366// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5367// parallelism, all bets are off and we may need to scan through the entire
5368// free list.
5369//
5370// This change also has a potentially large performance benefit, for some
5371// applications. Previously, as threads were freed from the hot team, they
5372// would be placed back on the free list in inverse order. If the hot team
5373// grew back to it's original size, then the freed thread would be placed
5374// back on the hot team in reverse order. This could cause bad cache
5375// locality problems on programs where the size of the hot team regularly
5376// grew and shrunk.
5377//
5378// Now, for single-level parallelism, the OMP tid is alway == gtid.
5379//
5380void
5381__kmp_free_thread( kmp_info_t *this_th )
5382{
5383 int gtid;
5384 kmp_info_t **scan;
5385
5386 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5387 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5388
5389 KMP_DEBUG_ASSERT( this_th );
5390
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005391 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5392 int b;
5393 kmp_balign_t *balign = this_th->th.th_bar;
5394 for (b=0; b<bs_last_barrier; ++b) {
5395 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5396 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5397 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005398 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005399 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005400 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005401
Jim Cownie5e8470a2013-09-27 10:38:44 +00005402 /* put thread back on the free pool */
5403 TCW_PTR(this_th->th.th_team, NULL);
5404 TCW_PTR(this_th->th.th_root, NULL);
5405 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5406
5407 //
5408 // If the __kmp_thread_pool_insert_pt is already past the new insert
5409 // point, then we need to re-scan the entire list.
5410 //
5411 gtid = this_th->th.th_info.ds.ds_gtid;
5412 if ( __kmp_thread_pool_insert_pt != NULL ) {
5413 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5414 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5415 __kmp_thread_pool_insert_pt = NULL;
5416 }
5417 }
5418
5419 //
5420 // Scan down the list to find the place to insert the thread.
5421 // scan is the address of a link in the list, possibly the address of
5422 // __kmp_thread_pool itself.
5423 //
5424 // In the absence of nested parallism, the for loop will have 0 iterations.
5425 //
5426 if ( __kmp_thread_pool_insert_pt != NULL ) {
5427 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5428 }
5429 else {
5430 scan = (kmp_info_t **)&__kmp_thread_pool;
5431 }
5432 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5433 scan = &( (*scan)->th.th_next_pool ) );
5434
5435 //
5436 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5437 // to its address.
5438 //
5439 TCW_PTR(this_th->th.th_next_pool, *scan);
5440 __kmp_thread_pool_insert_pt = *scan = this_th;
5441 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5442 || ( this_th->th.th_info.ds.ds_gtid
5443 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5444 TCW_4(this_th->th.th_in_pool, TRUE);
5445 __kmp_thread_pool_nth++;
5446
5447 TCW_4(__kmp_nth, __kmp_nth - 1);
5448
5449#ifdef KMP_ADJUST_BLOCKTIME
5450 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005451 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005452 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5453 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5454 if ( __kmp_nth <= __kmp_avail_proc ) {
5455 __kmp_zero_bt = FALSE;
5456 }
5457 }
5458#endif /* KMP_ADJUST_BLOCKTIME */
5459
5460 KMP_MB();
5461}
5462
Jim Cownie5e8470a2013-09-27 10:38:44 +00005463
Jim Cownie5e8470a2013-09-27 10:38:44 +00005464/* ------------------------------------------------------------------------ */
5465
5466void *
5467__kmp_launch_thread( kmp_info_t *this_thr )
5468{
5469 int gtid = this_thr->th.th_info.ds.ds_gtid;
5470/* void *stack_data;*/
5471 kmp_team_t *(*volatile pteam);
5472
5473 KMP_MB();
5474 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5475
5476 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005477 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005478 }
5479
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005480#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005481 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005482 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5483 this_thr->th.ompt_thread_info.wait_id = 0;
5484 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005485 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005486 __ompt_thread_begin(ompt_thread_worker, gtid);
5487 }
5488 }
5489#endif
5490
Jim Cownie5e8470a2013-09-27 10:38:44 +00005491 /* This is the place where threads wait for work */
5492 while( ! TCR_4(__kmp_global.g.g_done) ) {
5493 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5494 KMP_MB();
5495
5496 /* wait for work to do */
5497 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5498
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005499#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005500 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005501 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5502 }
5503#endif
5504
Jim Cownie5e8470a2013-09-27 10:38:44 +00005505 /* No tid yet since not part of a team */
5506 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5507
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005508#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005509 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005510 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5511 }
5512#endif
5513
Jim Cownie5e8470a2013-09-27 10:38:44 +00005514 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5515
5516 /* have we been allocated? */
5517 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005518#if OMPT_SUPPORT
5519 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005520 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005521 if (ompt_enabled) {
5522 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005523 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005524 }
5525#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005526 /* we were just woken up, so run our new task */
5527 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5528 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005529 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5530 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005531
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005532 updateHWFPControl (*pteam);
5533
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005534#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005535 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005536 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005537 // Initialize OMPT task id for implicit task.
5538 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005539 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005540 }
5541#endif
5542
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005543 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005544 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5545 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005546 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005547 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005548 KMP_ASSERT( rc );
5549
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005550#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005551 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005552 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005553 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005554
5555 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5556 }
5557#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005558 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005559 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5560 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005561 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005562 /* join barrier after parallel region */
5563 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005564#if OMPT_SUPPORT && OMPT_TRACE
5565 if (ompt_enabled) {
5566 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005567 // don't access *pteam here: it may have already been freed
5568 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005569 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5570 my_parallel_id, task_info->task_id);
5571 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005572 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005573 task_info->task_id = 0;
5574 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005575#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005576 }
5577 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005578 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005579
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005580#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005581 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005582 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5583 __ompt_thread_end(ompt_thread_worker, gtid);
5584 }
5585#endif
5586
Jonathan Peyton54127982015-11-04 21:37:48 +00005587 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005588 /* run the destructors for the threadprivate data for this thread */
5589 __kmp_common_destroy_gtid( gtid );
5590
5591 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5592 KMP_MB();
5593 return this_thr;
5594}
5595
5596/* ------------------------------------------------------------------------ */
5597/* ------------------------------------------------------------------------ */
5598
Jim Cownie5e8470a2013-09-27 10:38:44 +00005599void
5600__kmp_internal_end_dest( void *specific_gtid )
5601{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005602 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005603 #pragma warning( push )
5604 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5605 #endif
5606 // Make sure no significant bits are lost
5607 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005608 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005609 #pragma warning( pop )
5610 #endif
5611
5612 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5613 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5614 * this is because 0 is reserved for the nothing-stored case */
5615
5616 /* josh: One reason for setting the gtid specific data even when it is being
5617 destroyed by pthread is to allow gtid lookup through thread specific data
5618 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5619 that gets executed in the call to __kmp_internal_end_thread, actually
5620 gets the gtid through the thread specific data. Setting it here seems
5621 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5622 to run smoothly.
5623 todo: get rid of this after we remove the dependence on
5624 __kmp_gtid_get_specific
5625 */
5626 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5627 __kmp_gtid_set_specific( gtid );
5628 #ifdef KMP_TDATA_GTID
5629 __kmp_gtid = gtid;
5630 #endif
5631 __kmp_internal_end_thread( gtid );
5632}
5633
Jonathan Peyton99016992015-05-26 17:32:53 +00005634#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005635
5636// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005637// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005638// option in makefile.mk works fine.
5639
5640__attribute__(( destructor ))
5641void
5642__kmp_internal_end_dtor( void )
5643{
5644 __kmp_internal_end_atexit();
5645}
5646
5647void
5648__kmp_internal_end_fini( void )
5649{
5650 __kmp_internal_end_atexit();
5651}
5652
5653#endif
5654
5655/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5656void
5657__kmp_internal_end_atexit( void )
5658{
5659 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5660 /* [Windows]
5661 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5662 stat code that depends on thread specific data for gtid fails because that data becomes
5663 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5664 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5665 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5666
5667// TODO: Can some of this comment about GVS be removed?
5668 I suspect that the offending stat code is executed when the calling thread tries to
5669 clean up a dead root thread's data structures, resulting in GVS code trying to close
5670 the GVS structures for that thread, but since the stat code uses
5671 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5672 cleaning up itself instead of another thread, it gets confused. This happens because
5673 allowing a thread to unregister and cleanup another thread is a recent modification for
5674 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5675 thread may end up trying to unregister another thread only if thread death does not
5676 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5677 specific data destructor function to detect thread death. For Windows dynamic, there
5678 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5679 workaround is applicable only for Windows static stat library.
5680 */
5681 __kmp_internal_end_library( -1 );
5682 #if KMP_OS_WINDOWS
5683 __kmp_close_console();
5684 #endif
5685}
5686
5687static void
5688__kmp_reap_thread(
5689 kmp_info_t * thread,
5690 int is_root
5691) {
5692
Alp Toker8f2d3f02014-02-24 10:40:15 +00005693 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005694
5695 int gtid;
5696
5697 KMP_DEBUG_ASSERT( thread != NULL );
5698
5699 gtid = thread->th.th_info.ds.ds_gtid;
5700
5701 if ( ! is_root ) {
5702
5703 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5704 /* Assume the threads are at the fork barrier here */
5705 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5706 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00005707 ANNOTATE_HAPPENS_BEFORE(thread);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005708 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5709 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005710 }; // if
5711
Jim Cownie5e8470a2013-09-27 10:38:44 +00005712 // Terminate OS thread.
5713 __kmp_reap_worker( thread );
5714
5715 //
5716 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005717 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005718 //
5719 // There is a small timing hole here - if the worker thread was
5720 // just waking up after sleeping in the pool, had reset it's
5721 // th_active_in_pool flag but not decremented the global counter
5722 // __kmp_thread_pool_active_nth yet, then the global counter
5723 // might not get updated.
5724 //
5725 // Currently, this can only happen as the library is unloaded,
5726 // so there are no harmful side effects.
5727 //
5728 if ( thread->th.th_active_in_pool ) {
5729 thread->th.th_active_in_pool = FALSE;
5730 KMP_TEST_THEN_DEC32(
5731 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5732 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5733 }
5734
5735 // Decrement # of [worker] threads in the pool.
5736 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5737 --__kmp_thread_pool_nth;
5738 }; // if
5739
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005740 __kmp_free_implicit_task(thread);
5741
Jim Cownie5e8470a2013-09-27 10:38:44 +00005742 // Free the fast memory for tasking
5743 #if USE_FAST_MEMORY
5744 __kmp_free_fast_memory( thread );
5745 #endif /* USE_FAST_MEMORY */
5746
5747 __kmp_suspend_uninitialize_thread( thread );
5748
5749 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5750 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5751
5752 -- __kmp_all_nth;
5753 // __kmp_nth was decremented when thread is added to the pool.
5754
5755#ifdef KMP_ADJUST_BLOCKTIME
5756 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005757 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005758 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5759 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5760 if ( __kmp_nth <= __kmp_avail_proc ) {
5761 __kmp_zero_bt = FALSE;
5762 }
5763 }
5764#endif /* KMP_ADJUST_BLOCKTIME */
5765
5766 /* free the memory being used */
5767 if( __kmp_env_consistency_check ) {
5768 if ( thread->th.th_cons ) {
5769 __kmp_free_cons_stack( thread->th.th_cons );
5770 thread->th.th_cons = NULL;
5771 }; // if
5772 }
5773
5774 if ( thread->th.th_pri_common != NULL ) {
5775 __kmp_free( thread->th.th_pri_common );
5776 thread->th.th_pri_common = NULL;
5777 }; // if
5778
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005779 if (thread->th.th_task_state_memo_stack != NULL) {
5780 __kmp_free(thread->th.th_task_state_memo_stack);
5781 thread->th.th_task_state_memo_stack = NULL;
5782 }
5783
Jim Cownie5e8470a2013-09-27 10:38:44 +00005784 #if KMP_USE_BGET
5785 if ( thread->th.th_local.bget_data != NULL ) {
5786 __kmp_finalize_bget( thread );
5787 }; // if
5788 #endif
5789
Alp Toker98758b02014-03-02 04:12:06 +00005790#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005791 if ( thread->th.th_affin_mask != NULL ) {
5792 KMP_CPU_FREE( thread->th.th_affin_mask );
5793 thread->th.th_affin_mask = NULL;
5794 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005795#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005796
5797 __kmp_reap_team( thread->th.th_serial_team );
5798 thread->th.th_serial_team = NULL;
5799 __kmp_free( thread );
5800
5801 KMP_MB();
5802
5803} // __kmp_reap_thread
5804
5805static void
5806__kmp_internal_end(void)
5807{
5808 int i;
5809
5810 /* First, unregister the library */
5811 __kmp_unregister_library();
5812
5813 #if KMP_OS_WINDOWS
5814 /* In Win static library, we can't tell when a root actually dies, so we
5815 reclaim the data structures for any root threads that have died but not
5816 unregistered themselves, in order to shut down cleanly.
5817 In Win dynamic library we also can't tell when a thread dies.
5818 */
5819 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5820 #endif
5821
5822 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5823 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005824 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005825 break;
5826 KMP_MB(); /* Flush all pending memory write invalidates. */
5827 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5828
5829 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005830#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005831 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5832 KMP_MB(); /* Flush all pending memory write invalidates. */
5833
5834 //
5835 // Need to check that monitor was initialized before reaping it.
5836 // If we are called form __kmp_atfork_child (which sets
5837 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5838 // contain valid data, but it is only valid in the parent process,
5839 // not the child.
5840 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005841 // New behavior (201008): instead of keying off of the flag
5842 // __kmp_init_parallel, the monitor thread creation is keyed off
5843 // of the new flag __kmp_init_monitor.
5844 //
5845 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5846 if ( TCR_4( __kmp_init_monitor ) ) {
5847 __kmp_reap_monitor( & __kmp_monitor );
5848 TCW_4( __kmp_init_monitor, 0 );
5849 }
5850 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5851 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005852#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005853 } else {
5854 /* TODO move this to cleanup code */
5855 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005856 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5858 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005859// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005860 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861 }
5862 }
5863 #endif
5864
5865 KMP_MB();
5866
5867 // Reap the worker threads.
5868 // This is valid for now, but be careful if threads are reaped sooner.
5869 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5870 // Get the next thread from the pool.
5871 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5872 __kmp_thread_pool = thread->th.th_next_pool;
5873 // Reap it.
Andrey Churbanov581490e2017-02-06 18:53:32 +00005874 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005875 thread->th.th_next_pool = NULL;
5876 thread->th.th_in_pool = FALSE;
5877 __kmp_reap_thread( thread, 0 );
5878 }; // while
5879 __kmp_thread_pool_insert_pt = NULL;
5880
5881 // Reap teams.
5882 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5883 // Get the next team from the pool.
5884 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5885 __kmp_team_pool = team->t.t_next_pool;
5886 // Reap it.
5887 team->t.t_next_pool = NULL;
5888 __kmp_reap_team( team );
5889 }; // while
5890
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005891 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005892
5893 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5894 // TBD: Add some checking...
5895 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5896 }
5897
5898 /* Make sure all threadprivate destructors get run by joining with all worker
5899 threads before resetting this flag */
5900 TCW_SYNC_4(__kmp_init_common, FALSE);
5901
5902 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5903 KMP_MB();
5904
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005905#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005906 //
5907 // See note above: One of the possible fixes for CQ138434 / CQ140126
5908 //
5909 // FIXME: push both code fragments down and CSE them?
5910 // push them into __kmp_cleanup() ?
5911 //
5912 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5913 if ( TCR_4( __kmp_init_monitor ) ) {
5914 __kmp_reap_monitor( & __kmp_monitor );
5915 TCW_4( __kmp_init_monitor, 0 );
5916 }
5917 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5918 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005919#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005920 } /* else !__kmp_global.t_active */
5921 TCW_4(__kmp_init_gtid, FALSE);
5922 KMP_MB(); /* Flush all pending memory write invalidates. */
5923
Jim Cownie5e8470a2013-09-27 10:38:44 +00005924 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005925#if OMPT_SUPPORT
5926 ompt_fini();
5927#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005928}
5929
5930void
5931__kmp_internal_end_library( int gtid_req )
5932{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005933 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5934 /* this shouldn't be a race condition because __kmp_internal_end() is the
5935 * only place to clear __kmp_serial_init */
5936 /* we'll check this later too, after we get the lock */
5937 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5938 // because the next check will work in any case.
5939 if( __kmp_global.g.g_abort ) {
5940 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5941 /* TODO abort? */
5942 return;
5943 }
5944 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5945 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5946 return;
5947 }
5948
5949
5950 KMP_MB(); /* Flush all pending memory write invalidates. */
5951
5952 /* find out who we are and what we should do */
5953 {
5954 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5955 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5956 if( gtid == KMP_GTID_SHUTDOWN ) {
5957 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5958 return;
5959 } else if( gtid == KMP_GTID_MONITOR ) {
5960 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5961 return;
5962 } else if( gtid == KMP_GTID_DNE ) {
5963 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5964 /* we don't know who we are, but we may still shutdown the library */
5965 } else if( KMP_UBER_GTID( gtid )) {
5966 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005967 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005968 __kmp_global.g.g_abort = -1;
5969 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5970 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5971 return;
5972 } else {
5973 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5974 __kmp_unregister_root_current_thread( gtid );
5975 }
5976 } else {
5977 /* worker threads may call this function through the atexit handler, if they call exit() */
5978 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5979 TODO: do a thorough shutdown instead
5980 */
5981 #ifdef DUMP_DEBUG_ON_EXIT
5982 if ( __kmp_debug_buf )
5983 __kmp_dump_debug_buffer( );
5984 #endif
5985 return;
5986 }
5987 }
5988 /* synchronize the termination process */
5989 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5990
5991 /* have we already finished */
5992 if( __kmp_global.g.g_abort ) {
5993 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5994 /* TODO abort? */
5995 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5996 return;
5997 }
5998 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5999 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6000 return;
6001 }
6002
6003 /* We need this lock to enforce mutex between this reading of
6004 __kmp_threads_capacity and the writing by __kmp_register_root.
6005 Alternatively, we can use a counter of roots that is
6006 atomically updated by __kmp_get_global_thread_id_reg,
6007 __kmp_do_serial_initialize and __kmp_internal_end_*.
6008 */
6009 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6010
6011 /* now we can safely conduct the actual termination */
6012 __kmp_internal_end();
6013
6014 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6015 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6016
6017 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
6018
6019 #ifdef DUMP_DEBUG_ON_EXIT
6020 if ( __kmp_debug_buf )
6021 __kmp_dump_debug_buffer();
6022 #endif
6023
6024 #if KMP_OS_WINDOWS
6025 __kmp_close_console();
6026 #endif
6027
6028 __kmp_fini_allocator();
6029
6030} // __kmp_internal_end_library
6031
6032void
6033__kmp_internal_end_thread( int gtid_req )
6034{
6035 int i;
6036
6037 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6038 /* this shouldn't be a race condition because __kmp_internal_end() is the
6039 * only place to clear __kmp_serial_init */
6040 /* we'll check this later too, after we get the lock */
6041 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6042 // because the next check will work in any case.
6043 if( __kmp_global.g.g_abort ) {
6044 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6045 /* TODO abort? */
6046 return;
6047 }
6048 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6049 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6050 return;
6051 }
6052
6053 KMP_MB(); /* Flush all pending memory write invalidates. */
6054
6055 /* find out who we are and what we should do */
6056 {
6057 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6058 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6059 if( gtid == KMP_GTID_SHUTDOWN ) {
6060 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6061 return;
6062 } else if( gtid == KMP_GTID_MONITOR ) {
6063 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6064 return;
6065 } else if( gtid == KMP_GTID_DNE ) {
6066 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6067 return;
6068 /* we don't know who we are */
6069 } else if( KMP_UBER_GTID( gtid )) {
6070 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006071 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006072 __kmp_global.g.g_abort = -1;
6073 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6074 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6075 return;
6076 } else {
6077 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6078 __kmp_unregister_root_current_thread( gtid );
6079 }
6080 } else {
6081 /* just a worker thread, let's leave */
6082 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6083
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006084 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006085 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006086 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006087
6088 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6089 return;
6090 }
6091 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006092 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006093 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6094 // because we will better shutdown later in the library destructor.
6095 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006096 // in a loop forks and joins many openmp threads. We can save a lot of time
6097 // keeping worker threads alive until the program shutdown.
6098 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6099 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006100 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006101 return;
6102 #endif
6103 /* synchronize the termination process */
6104 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6105
6106 /* have we already finished */
6107 if( __kmp_global.g.g_abort ) {
6108 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6109 /* TODO abort? */
6110 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6111 return;
6112 }
6113 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6114 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6115 return;
6116 }
6117
6118 /* We need this lock to enforce mutex between this reading of
6119 __kmp_threads_capacity and the writing by __kmp_register_root.
6120 Alternatively, we can use a counter of roots that is
6121 atomically updated by __kmp_get_global_thread_id_reg,
6122 __kmp_do_serial_initialize and __kmp_internal_end_*.
6123 */
6124
6125 /* should we finish the run-time? are all siblings done? */
6126 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6127
6128 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6129 if ( KMP_UBER_GTID( i ) ) {
6130 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6131 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6132 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6133 return;
6134 };
6135 }
6136
6137 /* now we can safely conduct the actual termination */
6138
6139 __kmp_internal_end();
6140
6141 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6142 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6143
Jim Cownie77c2a632014-09-03 11:34:33 +00006144 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006145
6146 #ifdef DUMP_DEBUG_ON_EXIT
6147 if ( __kmp_debug_buf )
6148 __kmp_dump_debug_buffer();
6149 #endif
6150} // __kmp_internal_end_thread
6151
6152// -------------------------------------------------------------------------------------------------
6153// Library registration stuff.
6154
6155static long __kmp_registration_flag = 0;
6156 // Random value used to indicate library initialization.
6157static char * __kmp_registration_str = NULL;
6158 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6159
6160
6161static inline
6162char *
6163__kmp_reg_status_name() {
6164 /*
6165 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6166 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6167 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6168 */
6169 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6170} // __kmp_reg_status_get
6171
6172
6173void
6174__kmp_register_library_startup(
6175 void
6176) {
6177
6178 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6179 int done = 0;
6180 union {
6181 double dtime;
6182 long ltime;
6183 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006184 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006185 __kmp_initialize_system_tick();
6186 #endif
6187 __kmp_read_system_time( & time.dtime );
6188 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6189 __kmp_registration_str =
6190 __kmp_str_format(
6191 "%p-%lx-%s",
6192 & __kmp_registration_flag,
6193 __kmp_registration_flag,
6194 KMP_LIBRARY_FILE
6195 );
6196
6197 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6198
6199 while ( ! done ) {
6200
6201 char * value = NULL; // Actual value of the environment variable.
6202
6203 // Set environment variable, but do not overwrite if it is exist.
6204 __kmp_env_set( name, __kmp_registration_str, 0 );
6205 // Check the variable is written.
6206 value = __kmp_env_get( name );
6207 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6208
6209 done = 1; // Ok, environment variable set successfully, exit the loop.
6210
6211 } else {
6212
6213 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6214 // Check whether it alive or dead.
6215 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6216 char * tail = value;
6217 char * flag_addr_str = NULL;
6218 char * flag_val_str = NULL;
6219 char const * file_name = NULL;
6220 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6221 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6222 file_name = tail;
6223 if ( tail != NULL ) {
6224 long * flag_addr = 0;
6225 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006226 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6227 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006228 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6229 // First, check whether environment-encoded address is mapped into addr space.
6230 // If so, dereference it to see if it still has the right value.
6231
6232 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6233 neighbor = 1;
6234 } else {
6235 // If not, then we know the other copy of the library is no longer running.
6236 neighbor = 2;
6237 }; // if
6238 }; // if
6239 }; // if
6240 switch ( neighbor ) {
6241 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6242 // Assume it is the incompatible format of future version of the library.
6243 // Assume the other library is alive.
6244 // WARN( ... ); // TODO: Issue a warning.
6245 file_name = "unknown library";
6246 // Attention! Falling to the next case. That's intentional.
6247 case 1 : { // Neighbor is alive.
6248 // Check it is allowed.
6249 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6250 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6251 // That's not allowed. Issue fatal error.
6252 __kmp_msg(
6253 kmp_ms_fatal,
6254 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6255 KMP_HNT( DuplicateLibrary ),
6256 __kmp_msg_null
6257 );
6258 }; // if
6259 KMP_INTERNAL_FREE( duplicate_ok );
6260 __kmp_duplicate_library_ok = 1;
6261 done = 1; // Exit the loop.
6262 } break;
6263 case 2 : { // Neighbor is dead.
6264 // Clear the variable and try to register library again.
6265 __kmp_env_unset( name );
6266 } break;
6267 default : {
6268 KMP_DEBUG_ASSERT( 0 );
6269 } break;
6270 }; // switch
6271
6272 }; // if
6273 KMP_INTERNAL_FREE( (void *) value );
6274
6275 }; // while
6276 KMP_INTERNAL_FREE( (void *) name );
6277
6278} // func __kmp_register_library_startup
6279
6280
6281void
6282__kmp_unregister_library( void ) {
6283
6284 char * name = __kmp_reg_status_name();
6285 char * value = __kmp_env_get( name );
6286
6287 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6288 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6289 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6290 // Ok, this is our variable. Delete it.
6291 __kmp_env_unset( name );
6292 }; // if
6293
6294 KMP_INTERNAL_FREE( __kmp_registration_str );
6295 KMP_INTERNAL_FREE( value );
6296 KMP_INTERNAL_FREE( name );
6297
6298 __kmp_registration_flag = 0;
6299 __kmp_registration_str = NULL;
6300
6301} // __kmp_unregister_library
6302
6303
6304// End of Library registration stuff.
6305// -------------------------------------------------------------------------------------------------
6306
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006307#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6308
6309static void __kmp_check_mic_type()
6310{
6311 kmp_cpuid_t cpuid_state = {0};
6312 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006313 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006314 // We don't support mic1 at the moment
6315 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6316 __kmp_mic_type = mic2;
6317 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6318 __kmp_mic_type = mic3;
6319 } else {
6320 __kmp_mic_type = non_mic;
6321 }
6322}
6323
6324#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6325
Jim Cownie5e8470a2013-09-27 10:38:44 +00006326static void
6327__kmp_do_serial_initialize( void )
6328{
6329 int i, gtid;
6330 int size;
6331
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006332 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006333
6334 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6335 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6336 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6337 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6338 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6339
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006340#if OMPT_SUPPORT
6341 ompt_pre_init();
6342#endif
6343
Jim Cownie5e8470a2013-09-27 10:38:44 +00006344 __kmp_validate_locks();
6345
6346 /* Initialize internal memory allocator */
6347 __kmp_init_allocator();
6348
6349 /* Register the library startup via an environment variable
6350 and check to see whether another copy of the library is already
6351 registered. */
6352
6353 __kmp_register_library_startup( );
6354
6355 /* TODO reinitialization of library */
6356 if( TCR_4(__kmp_global.g.g_done) ) {
6357 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6358 }
6359
6360 __kmp_global.g.g_abort = 0;
6361 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6362
6363 /* initialize the locks */
6364#if KMP_USE_ADAPTIVE_LOCKS
6365#if KMP_DEBUG_ADAPTIVE_LOCKS
6366 __kmp_init_speculative_stats();
6367#endif
6368#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006369#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00006370 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006371#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006372 __kmp_init_lock( & __kmp_global_lock );
6373 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6374 __kmp_init_lock( & __kmp_debug_lock );
6375 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6376 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6377 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6378 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6382 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6383 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6384 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6385 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6386 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6387 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6388 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6389 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006390#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006391 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006392#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006393 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6394
6395 /* conduct initialization and initial setup of configuration */
6396
6397 __kmp_runtime_initialize();
6398
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006399#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6400 __kmp_check_mic_type();
6401#endif
6402
Jim Cownie5e8470a2013-09-27 10:38:44 +00006403 // Some global variable initialization moved here from kmp_env_initialize()
6404#ifdef KMP_DEBUG
6405 kmp_diag = 0;
6406#endif
6407 __kmp_abort_delay = 0;
6408
6409 // From __kmp_init_dflt_team_nth()
6410 /* assume the entire machine will be used */
6411 __kmp_dflt_team_nth_ub = __kmp_xproc;
6412 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6413 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6414 }
6415 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6416 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6417 }
6418 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006419
6420 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6421 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006422#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006423 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6424 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006425#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006426 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6427 __kmp_library = library_throughput;
6428 // From KMP_SCHEDULE initialization
6429 __kmp_static = kmp_sch_static_balanced;
6430 // AC: do not use analytical here, because it is non-monotonous
6431 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006432 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006433 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6434 // control parts
6435 #if KMP_FAST_REDUCTION_BARRIER
6436 #define kmp_reduction_barrier_gather_bb ((int)1)
6437 #define kmp_reduction_barrier_release_bb ((int)1)
6438 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6439 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6440 #endif // KMP_FAST_REDUCTION_BARRIER
6441 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6442 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6443 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6444 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6445 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6446 #if KMP_FAST_REDUCTION_BARRIER
6447 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6448 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6449 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6450 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6451 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6452 }
6453 #endif // KMP_FAST_REDUCTION_BARRIER
6454 }
6455 #if KMP_FAST_REDUCTION_BARRIER
6456 #undef kmp_reduction_barrier_release_pat
6457 #undef kmp_reduction_barrier_gather_pat
6458 #undef kmp_reduction_barrier_release_bb
6459 #undef kmp_reduction_barrier_gather_bb
6460 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006461#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006462 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006463 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006464 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006465 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6466 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6467 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6468 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006469#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006470 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006471 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6472 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6473 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006474#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006475#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006476
6477 // From KMP_CHECKS initialization
6478#ifdef KMP_DEBUG
6479 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6480#else
6481 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6482#endif
6483
6484 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6485 __kmp_foreign_tp = TRUE;
6486
6487 __kmp_global.g.g_dynamic = FALSE;
6488 __kmp_global.g.g_dynamic_mode = dynamic_default;
6489
6490 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006491
Jim Cownie5e8470a2013-09-27 10:38:44 +00006492 // Print all messages in message catalog for testing purposes.
6493 #ifdef KMP_DEBUG
6494 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6495 if ( __kmp_str_match_true( val ) ) {
6496 kmp_str_buf_t buffer;
6497 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006498 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006499 __kmp_printf( "%s", buffer.str );
6500 __kmp_str_buf_free( & buffer );
6501 }; // if
6502 __kmp_env_free( & val );
6503 #endif
6504
Jim Cownie181b4bb2013-12-23 17:28:57 +00006505 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006506 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6507 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6508
Jim Cownie5e8470a2013-09-27 10:38:44 +00006509 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6510 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6511 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6512 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6513 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6514 __kmp_thread_pool = NULL;
6515 __kmp_thread_pool_insert_pt = NULL;
6516 __kmp_team_pool = NULL;
6517
6518 /* Allocate all of the variable sized records */
6519 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6520 /* Since allocation is cache-aligned, just add extra padding at the end */
6521 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6522 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6523 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6524
6525 /* init thread counts */
6526 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6527 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6528 __kmp_all_nth = 0;
6529 __kmp_nth = 0;
6530
6531 /* setup the uber master thread and hierarchy */
6532 gtid = __kmp_register_root( TRUE );
6533 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6534 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6535 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6536
6537 KMP_MB(); /* Flush all pending memory write invalidates. */
6538
6539 __kmp_common_initialize();
6540
6541 #if KMP_OS_UNIX
6542 /* invoke the child fork handler */
6543 __kmp_register_atfork();
6544 #endif
6545
Jonathan Peyton99016992015-05-26 17:32:53 +00006546 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006547 {
6548 /* Invoke the exit handler when the program finishes, only for static library.
6549 For dynamic library, we already have _fini and DllMain.
6550 */
6551 int rc = atexit( __kmp_internal_end_atexit );
6552 if ( rc != 0 ) {
6553 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6554 }; // if
6555 }
6556 #endif
6557
6558 #if KMP_HANDLE_SIGNALS
6559 #if KMP_OS_UNIX
6560 /* NOTE: make sure that this is called before the user installs
6561 * their own signal handlers so that the user handlers
6562 * are called first. this way they can return false,
6563 * not call our handler, avoid terminating the library,
6564 * and continue execution where they left off. */
6565 __kmp_install_signals( FALSE );
6566 #endif /* KMP_OS_UNIX */
6567 #if KMP_OS_WINDOWS
6568 __kmp_install_signals( TRUE );
6569 #endif /* KMP_OS_WINDOWS */
6570 #endif
6571
6572 /* we have finished the serial initialization */
6573 __kmp_init_counter ++;
6574
6575 __kmp_init_serial = TRUE;
6576
6577 if (__kmp_settings) {
6578 __kmp_env_print();
6579 }
6580
6581#if OMP_40_ENABLED
6582 if (__kmp_display_env || __kmp_display_env_verbose) {
6583 __kmp_env_print_2();
6584 }
6585#endif // OMP_40_ENABLED
6586
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006587#if OMPT_SUPPORT
6588 ompt_post_init();
6589#endif
6590
Jim Cownie5e8470a2013-09-27 10:38:44 +00006591 KMP_MB();
6592
6593 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6594}
6595
6596void
6597__kmp_serial_initialize( void )
6598{
6599 if ( __kmp_init_serial ) {
6600 return;
6601 }
6602 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6603 if ( __kmp_init_serial ) {
6604 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6605 return;
6606 }
6607 __kmp_do_serial_initialize();
6608 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6609}
6610
6611static void
6612__kmp_do_middle_initialize( void )
6613{
6614 int i, j;
6615 int prev_dflt_team_nth;
6616
6617 if( !__kmp_init_serial ) {
6618 __kmp_do_serial_initialize();
6619 }
6620
6621 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6622
6623 //
6624 // Save the previous value for the __kmp_dflt_team_nth so that
6625 // we can avoid some reinitialization if it hasn't changed.
6626 //
6627 prev_dflt_team_nth = __kmp_dflt_team_nth;
6628
Alp Toker98758b02014-03-02 04:12:06 +00006629#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006630 //
6631 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6632 // number of cores on the machine.
6633 //
6634 __kmp_affinity_initialize();
6635
6636 //
6637 // Run through the __kmp_threads array and set the affinity mask
6638 // for each root thread that is currently registered with the RTL.
6639 //
6640 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6641 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6642 __kmp_affinity_set_init_mask( i, TRUE );
6643 }
6644 }
Alp Toker98758b02014-03-02 04:12:06 +00006645#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006646
6647 KMP_ASSERT( __kmp_xproc > 0 );
6648 if ( __kmp_avail_proc == 0 ) {
6649 __kmp_avail_proc = __kmp_xproc;
6650 }
6651
6652 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6653 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006654 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006655 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6656 j++;
6657 }
6658
6659 if ( __kmp_dflt_team_nth == 0 ) {
6660#ifdef KMP_DFLT_NTH_CORES
6661 //
6662 // Default #threads = #cores
6663 //
6664 __kmp_dflt_team_nth = __kmp_ncores;
6665 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6666 __kmp_dflt_team_nth ) );
6667#else
6668 //
6669 // Default #threads = #available OS procs
6670 //
6671 __kmp_dflt_team_nth = __kmp_avail_proc;
6672 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6673 __kmp_dflt_team_nth ) );
6674#endif /* KMP_DFLT_NTH_CORES */
6675 }
6676
6677 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6678 __kmp_dflt_team_nth = KMP_MIN_NTH;
6679 }
6680 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6681 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6682 }
6683
6684 //
6685 // There's no harm in continuing if the following check fails,
6686 // but it indicates an error in the previous logic.
6687 //
6688 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6689
6690 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6691 //
6692 // Run through the __kmp_threads array and set the num threads icv
6693 // for each root thread that is currently registered with the RTL
6694 // (which has not already explicitly set its nthreads-var with a
6695 // call to omp_set_num_threads()).
6696 //
6697 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6698 kmp_info_t *thread = __kmp_threads[ i ];
6699 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006700 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006701
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006702 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006703 }
6704 }
6705 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6706 __kmp_dflt_team_nth) );
6707
6708#ifdef KMP_ADJUST_BLOCKTIME
6709 /* Adjust blocktime to zero if necessary */
6710 /* now that __kmp_avail_proc is set */
6711 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6712 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6713 if ( __kmp_nth > __kmp_avail_proc ) {
6714 __kmp_zero_bt = TRUE;
6715 }
6716 }
6717#endif /* KMP_ADJUST_BLOCKTIME */
6718
6719 /* we have finished middle initialization */
6720 TCW_SYNC_4(__kmp_init_middle, TRUE);
6721
6722 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6723}
6724
6725void
6726__kmp_middle_initialize( void )
6727{
6728 if ( __kmp_init_middle ) {
6729 return;
6730 }
6731 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6732 if ( __kmp_init_middle ) {
6733 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6734 return;
6735 }
6736 __kmp_do_middle_initialize();
6737 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6738}
6739
6740void
6741__kmp_parallel_initialize( void )
6742{
6743 int gtid = __kmp_entry_gtid(); // this might be a new root
6744
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006745 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006746 if( TCR_4(__kmp_init_parallel) ) return;
6747 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6748 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6749
6750 /* TODO reinitialization after we have already shut down */
6751 if( TCR_4(__kmp_global.g.g_done) ) {
6752 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6753 __kmp_infinite_loop();
6754 }
6755
6756 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6757 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6758 */
6759 if( !__kmp_init_middle ) {
6760 __kmp_do_middle_initialize();
6761 }
6762
6763 /* begin initialization */
6764 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6765 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6766
6767#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6768 //
6769 // Save the FP control regs.
6770 // Worker threads will set theirs to these values at thread startup.
6771 //
6772 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6773 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6774 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6775#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6776
6777#if KMP_OS_UNIX
6778# if KMP_HANDLE_SIGNALS
6779 /* must be after __kmp_serial_initialize */
6780 __kmp_install_signals( TRUE );
6781# endif
6782#endif
6783
6784 __kmp_suspend_initialize();
6785
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006786#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006787 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6788 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6789 }
6790#else
6791 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6792 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6793 }
6794#endif
6795
6796 if ( __kmp_version ) {
6797 __kmp_print_version_2();
6798 }
6799
Jim Cownie5e8470a2013-09-27 10:38:44 +00006800 /* we have finished parallel initialization */
6801 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6802
6803 KMP_MB();
6804 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6805
6806 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6807}
6808
6809
6810/* ------------------------------------------------------------------------ */
6811
6812void
6813__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6814 kmp_team_t *team )
6815{
6816 kmp_disp_t *dispatch;
6817
6818 KMP_MB();
6819
6820 /* none of the threads have encountered any constructs, yet. */
6821 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006822#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006823 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006824#endif /* KMP_CACHE_MANAGE */
6825 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6826 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006827 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6828 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006829
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006830 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006831#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006832 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6833#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006834 if( __kmp_env_consistency_check )
6835 __kmp_push_parallel( gtid, team->t.t_ident );
6836
6837 KMP_MB(); /* Flush all pending memory write invalidates. */
6838}
6839
6840void
6841__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6842 kmp_team_t *team )
6843{
6844 if( __kmp_env_consistency_check )
6845 __kmp_pop_parallel( gtid, team->t.t_ident );
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006846
6847 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006848}
6849
6850int
6851__kmp_invoke_task_func( int gtid )
6852{
6853 int rc;
6854 int tid = __kmp_tid_from_gtid( gtid );
6855 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006856 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006857
6858 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6859#if USE_ITT_BUILD
6860 if ( __itt_stack_caller_create_ptr ) {
6861 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6862 }
6863#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006864#if INCLUDE_SSC_MARKS
6865 SSC_MARK_INVOKING();
6866#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006867
6868#if OMPT_SUPPORT
6869 void *dummy;
6870 void **exit_runtime_p;
6871 ompt_task_id_t my_task_id;
6872 ompt_parallel_id_t my_parallel_id;
6873
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006874 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006875 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6876 ompt_task_info.frame.exit_runtime_frame);
6877 } else {
6878 exit_runtime_p = &dummy;
6879 }
6880
6881#if OMPT_TRACE
6882 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6883 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006884 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006885 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6886 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6887 my_parallel_id, my_task_id);
6888 }
6889#endif
6890#endif
6891
Jonathan Peyton45be4502015-08-11 21:36:41 +00006892 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006893 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6894 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006895 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6896 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006897#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006898 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006899#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006900 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006901#if OMPT_SUPPORT
6902 *exit_runtime_p = NULL;
6903#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006904 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006905
Jim Cownie5e8470a2013-09-27 10:38:44 +00006906#if USE_ITT_BUILD
6907 if ( __itt_stack_caller_create_ptr ) {
6908 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6909 }
6910#endif /* USE_ITT_BUILD */
6911 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6912
6913 return rc;
6914}
6915
6916#if OMP_40_ENABLED
6917void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006918__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006919{
6920 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921 kmp_info_t *thr = __kmp_threads[ gtid ];
6922 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006923 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006924 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6925 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6926 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006927 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006928 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006929 // Launch league of teams now, but not let workers execute
6930 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006931#if INCLUDE_SSC_MARKS
6932 SSC_MARK_FORKING();
6933#endif
6934 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006935 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006936#if OMPT_SUPPORT
6937 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6938#endif
6939 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006940 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6941 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006942#if INCLUDE_SSC_MARKS
6943 SSC_MARK_JOINING();
6944#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006945
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006946 // AC: last parameter "1" eliminates join barrier which won't work because
6947 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006948 __kmp_join_call( loc, gtid
6949#if OMPT_SUPPORT
6950 , fork_context_intel
6951#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006952 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006953}
6954
6955int
6956__kmp_invoke_teams_master( int gtid )
6957{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006958 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6959 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960 #if KMP_DEBUG
6961 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6962 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6963 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006964 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6965 __kmp_teams_master( gtid );
6966 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006967 return 1;
6968}
6969#endif /* OMP_40_ENABLED */
6970
6971/* this sets the requested number of threads for the next parallel region
6972 * encountered by this team */
6973/* since this should be enclosed in the forkjoin critical section it
6974 * should avoid race conditions with assymmetrical nested parallelism */
6975
6976void
6977__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6978{
6979 kmp_info_t *thr = __kmp_threads[gtid];
6980
6981 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006982 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006983}
6984
6985#if OMP_40_ENABLED
6986
6987/* this sets the requested number of teams for the teams region and/or
6988 * the number of threads for the next parallel region encountered */
6989void
6990__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6991{
6992 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006993 KMP_DEBUG_ASSERT(num_teams >= 0);
6994 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006995
6996 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006997 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006998 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6999 if ( !__kmp_reserve_warn ) {
7000 __kmp_reserve_warn = 1;
7001 __kmp_msg(
7002 kmp_ms_warning,
7003 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
7004 KMP_HNT( Unset_ALL_THREADS ),
7005 __kmp_msg_null
7006 );
7007 }
7008 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007009 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007010 // Set number of teams (number of threads in the outer "parallel" of the teams)
7011 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7012
7013 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007014 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007015 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007016 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007017 num_threads = __kmp_avail_proc / num_teams;
7018 if( num_teams * num_threads > __kmp_max_nth ) {
7019 // adjust num_threads w/o warning as it is not user setting
7020 num_threads = __kmp_max_nth / num_teams;
7021 }
7022 } else {
7023 if( num_teams * num_threads > __kmp_max_nth ) {
7024 int new_threads = __kmp_max_nth / num_teams;
7025 if ( !__kmp_reserve_warn ) { // user asked for too many threads
7026 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
7027 __kmp_msg(
7028 kmp_ms_warning,
7029 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
7030 KMP_HNT( Unset_ALL_THREADS ),
7031 __kmp_msg_null
7032 );
7033 }
7034 num_threads = new_threads;
7035 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007036 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007037 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007038}
7039
7040
7041//
7042// Set the proc_bind var to use in the following parallel region.
7043//
7044void
7045__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7046{
7047 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007048 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007049}
7050
7051#endif /* OMP_40_ENABLED */
7052
7053/* Launch the worker threads into the microtask. */
7054
7055void
7056__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7057{
7058 kmp_info_t *this_thr = __kmp_threads[gtid];
7059
7060#ifdef KMP_DEBUG
7061 int f;
7062#endif /* KMP_DEBUG */
7063
7064 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007065 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007066 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7067 KMP_MB(); /* Flush all pending memory write invalidates. */
7068
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007069 team->t.t_construct = 0; /* no single directives seen yet */
7070 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007071
7072 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007073 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007074 if ( team->t.t_max_nproc > 1 ) {
7075 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007076 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007077 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007078#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007079 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7080#endif
7081 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007082 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007083 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007084#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007085 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7086#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007087 }
7088
7089 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007090 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007091
7092#ifdef KMP_DEBUG
7093 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7094 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7095 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7096 }
7097#endif /* KMP_DEBUG */
7098
7099 /* release the worker threads so they may begin working */
7100 __kmp_fork_barrier( gtid, 0 );
7101}
7102
7103
7104void
7105__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7106{
7107 kmp_info_t *this_thr = __kmp_threads[gtid];
7108
7109 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007110 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007111 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7112 KMP_MB(); /* Flush all pending memory write invalidates. */
7113
7114 /* Join barrier after fork */
7115
7116#ifdef KMP_DEBUG
7117 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7118 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7119 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7120 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7121 __kmp_print_structure();
7122 }
7123 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7124 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7125#endif /* KMP_DEBUG */
7126
7127 __kmp_join_barrier( gtid ); /* wait for everyone */
7128
7129 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007130 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007131}
7132
7133
7134/* ------------------------------------------------------------------------ */
7135/* ------------------------------------------------------------------------ */
7136
7137#ifdef USE_LOAD_BALANCE
7138
7139//
7140// Return the worker threads actively spinning in the hot team, if we
7141// are at the outermost level of parallelism. Otherwise, return 0.
7142//
7143static int
7144__kmp_active_hot_team_nproc( kmp_root_t *root )
7145{
7146 int i;
7147 int retval;
7148 kmp_team_t *hot_team;
7149
7150 if ( root->r.r_active ) {
7151 return 0;
7152 }
7153 hot_team = root->r.r_hot_team;
7154 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7155 return hot_team->t.t_nproc - 1; // Don't count master thread
7156 }
7157
7158 //
7159 // Skip the master thread - it is accounted for elsewhere.
7160 //
7161 retval = 0;
7162 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7163 if ( hot_team->t.t_threads[i]->th.th_active ) {
7164 retval++;
7165 }
7166 }
7167 return retval;
7168}
7169
7170//
7171// Perform an automatic adjustment to the number of
7172// threads used by the next parallel region.
7173//
7174static int
7175__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7176{
7177 int retval;
7178 int pool_active;
7179 int hot_team_active;
7180 int team_curr_active;
7181 int system_active;
7182
7183 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7184 root, set_nproc ) );
7185 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007186 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007187 KMP_DEBUG_ASSERT( set_nproc > 1 );
7188
7189 if ( set_nproc == 1) {
7190 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7191 return 1;
7192 }
7193
7194 //
7195 // Threads that are active in the thread pool, active in the hot team
7196 // for this particular root (if we are at the outer par level), and
7197 // the currently executing thread (to become the master) are available
7198 // to add to the new team, but are currently contributing to the system
7199 // load, and must be accounted for.
7200 //
7201 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7202 hot_team_active = __kmp_active_hot_team_nproc( root );
7203 team_curr_active = pool_active + hot_team_active + 1;
7204
7205 //
7206 // Check the system load.
7207 //
7208 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7209 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7210 system_active, pool_active, hot_team_active ) );
7211
7212 if ( system_active < 0 ) {
7213 //
7214 // There was an error reading the necessary info from /proc,
7215 // so use the thread limit algorithm instead. Once we set
7216 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7217 // we shouldn't wind up getting back here.
7218 //
7219 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7220 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7221
7222 //
7223 // Make this call behave like the thread limit algorithm.
7224 //
7225 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7226 : root->r.r_hot_team->t.t_nproc);
7227 if ( retval > set_nproc ) {
7228 retval = set_nproc;
7229 }
7230 if ( retval < KMP_MIN_NTH ) {
7231 retval = KMP_MIN_NTH;
7232 }
7233
7234 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7235 return retval;
7236 }
7237
7238 //
7239 // There is a slight delay in the load balance algorithm in detecting
7240 // new running procs. The real system load at this instant should be
7241 // at least as large as the #active omp thread that are available to
7242 // add to the team.
7243 //
7244 if ( system_active < team_curr_active ) {
7245 system_active = team_curr_active;
7246 }
7247 retval = __kmp_avail_proc - system_active + team_curr_active;
7248 if ( retval > set_nproc ) {
7249 retval = set_nproc;
7250 }
7251 if ( retval < KMP_MIN_NTH ) {
7252 retval = KMP_MIN_NTH;
7253 }
7254
7255 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7256 return retval;
7257} // __kmp_load_balance_nproc()
7258
7259#endif /* USE_LOAD_BALANCE */
7260
Jim Cownie5e8470a2013-09-27 10:38:44 +00007261/* ------------------------------------------------------------------------ */
7262/* ------------------------------------------------------------------------ */
7263
7264/* NOTE: this is called with the __kmp_init_lock held */
7265void
7266__kmp_cleanup( void )
7267{
7268 int f;
7269
7270 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7271
7272 if (TCR_4(__kmp_init_parallel)) {
7273#if KMP_HANDLE_SIGNALS
7274 __kmp_remove_signals();
7275#endif
7276 TCW_4(__kmp_init_parallel, FALSE);
7277 }
7278
7279 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007280#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007281 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007282#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007283 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007284 TCW_4(__kmp_init_middle, FALSE);
7285 }
7286
7287 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7288
7289 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007290 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007291 __kmp_init_serial = FALSE;
7292 }
7293
7294 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7295 if ( __kmp_root[ f ] != NULL ) {
7296 __kmp_free( __kmp_root[ f ] );
7297 __kmp_root[ f ] = NULL;
7298 }
7299 }
7300 __kmp_free( __kmp_threads );
7301 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7302 // freeing __kmp_root.
7303 __kmp_threads = NULL;
7304 __kmp_root = NULL;
7305 __kmp_threads_capacity = 0;
7306
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007307#if KMP_USE_DYNAMIC_LOCK
7308 __kmp_cleanup_indirect_user_locks();
7309#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007310 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007311#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007312
Alp Toker98758b02014-03-02 04:12:06 +00007313 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007314 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7315 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007316 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007317
7318 #if KMP_USE_ADAPTIVE_LOCKS
7319 #if KMP_DEBUG_ADAPTIVE_LOCKS
7320 __kmp_print_speculative_stats();
7321 #endif
7322 #endif
7323 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7324 __kmp_nested_nth.nth = NULL;
7325 __kmp_nested_nth.size = 0;
7326 __kmp_nested_nth.used = 0;
Jonathan Peytond0365a22017-01-18 06:40:19 +00007327 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types );
7328 __kmp_nested_proc_bind.bind_types = NULL;
7329 __kmp_nested_proc_bind.size = 0;
7330 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007331
7332 __kmp_i18n_catclose();
7333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007334#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00007335 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007336#endif
7337
Jim Cownie5e8470a2013-09-27 10:38:44 +00007338 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7339}
7340
7341/* ------------------------------------------------------------------------ */
7342/* ------------------------------------------------------------------------ */
7343
7344int
7345__kmp_ignore_mppbeg( void )
7346{
7347 char *env;
7348
7349 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7350 if (__kmp_str_match_false( env ))
7351 return FALSE;
7352 }
7353 // By default __kmpc_begin() is no-op.
7354 return TRUE;
7355}
7356
7357int
7358__kmp_ignore_mppend( void )
7359{
7360 char *env;
7361
7362 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7363 if (__kmp_str_match_false( env ))
7364 return FALSE;
7365 }
7366 // By default __kmpc_end() is no-op.
7367 return TRUE;
7368}
7369
7370void
7371__kmp_internal_begin( void )
7372{
7373 int gtid;
7374 kmp_root_t *root;
7375
7376 /* this is a very important step as it will register new sibling threads
7377 * and assign these new uber threads a new gtid */
7378 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007379 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007380 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7381
7382 if( root->r.r_begin ) return;
7383 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7384 if( root->r.r_begin ) {
7385 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7386 return;
7387 }
7388
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007389 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007390
7391 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7392}
7393
7394
7395/* ------------------------------------------------------------------------ */
7396/* ------------------------------------------------------------------------ */
7397
7398void
7399__kmp_user_set_library (enum library_type arg)
7400{
7401 int gtid;
7402 kmp_root_t *root;
7403 kmp_info_t *thread;
7404
7405 /* first, make sure we are initialized so we can get our gtid */
7406
7407 gtid = __kmp_entry_gtid();
7408 thread = __kmp_threads[ gtid ];
7409
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007410 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007411
7412 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7413 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7414 KMP_WARNING( SetLibraryIncorrectCall );
7415 return;
7416 }
7417
7418 switch ( arg ) {
7419 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007420 thread->th.th_set_nproc = 0;
7421 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007422 break;
7423 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007424 thread->th.th_set_nproc = 0;
7425 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007426 break;
7427 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007428 thread->th.th_set_nproc = 0;
7429 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007430 break;
7431 default:
7432 KMP_FATAL( UnknownLibraryType, arg );
7433 }
7434
7435 __kmp_aux_set_library ( arg );
7436}
7437
7438void
7439__kmp_aux_set_stacksize( size_t arg )
7440{
7441 if (! __kmp_init_serial)
7442 __kmp_serial_initialize();
7443
7444#if KMP_OS_DARWIN
7445 if (arg & (0x1000 - 1)) {
7446 arg &= ~(0x1000 - 1);
7447 if(arg + 0x1000) /* check for overflow if we round up */
7448 arg += 0x1000;
7449 }
7450#endif
7451 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7452
7453 /* only change the default stacksize before the first parallel region */
7454 if (! TCR_4(__kmp_init_parallel)) {
7455 size_t value = arg; /* argument is in bytes */
7456
7457 if (value < __kmp_sys_min_stksize )
7458 value = __kmp_sys_min_stksize ;
7459 else if (value > KMP_MAX_STKSIZE)
7460 value = KMP_MAX_STKSIZE;
7461
7462 __kmp_stksize = value;
7463
7464 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7465 }
7466
7467 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7468}
7469
7470/* set the behaviour of the runtime library */
7471/* TODO this can cause some odd behaviour with sibling parallelism... */
7472void
7473__kmp_aux_set_library (enum library_type arg)
7474{
7475 __kmp_library = arg;
7476
7477 switch ( __kmp_library ) {
7478 case library_serial :
7479 {
7480 KMP_INFORM( LibraryIsSerial );
7481 (void) __kmp_change_library( TRUE );
7482 }
7483 break;
7484 case library_turnaround :
7485 (void) __kmp_change_library( TRUE );
7486 break;
7487 case library_throughput :
7488 (void) __kmp_change_library( FALSE );
7489 break;
7490 default:
7491 KMP_FATAL( UnknownLibraryType, arg );
7492 }
7493}
7494
7495/* ------------------------------------------------------------------------ */
7496/* ------------------------------------------------------------------------ */
7497
7498void
7499__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7500{
7501 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007502#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007503 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007504#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505 int bt_set;
7506
7507 __kmp_save_internal_controls( thread );
7508
7509 /* Normalize and set blocktime for the teams */
7510 if (blocktime < KMP_MIN_BLOCKTIME)
7511 blocktime = KMP_MIN_BLOCKTIME;
7512 else if (blocktime > KMP_MAX_BLOCKTIME)
7513 blocktime = KMP_MAX_BLOCKTIME;
7514
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007515 set__blocktime_team( thread->th.th_team, tid, blocktime );
7516 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007517
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007518#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007519 /* Calculate and set blocktime intervals for the teams */
7520 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7521
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007522 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7523 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007524#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007525
7526 /* Set whether blocktime has been set to "TRUE" */
7527 bt_set = TRUE;
7528
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007529 set__bt_set_team( thread->th.th_team, tid, bt_set );
7530 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007531#if KMP_USE_MONITOR
Samuel Antao33515192016-10-20 13:20:17 +00007532 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7533 "bt_intervals=%d, monitor_updates=%d\n",
7534 __kmp_gtid_from_tid(tid, thread->th.th_team),
7535 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7536 __kmp_monitor_wakeups));
7537#else
7538 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7539 __kmp_gtid_from_tid(tid, thread->th.th_team),
7540 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007541#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007542}
7543
7544void
7545__kmp_aux_set_defaults(
7546 char const * str,
7547 int len
7548) {
7549 if ( ! __kmp_init_serial ) {
7550 __kmp_serial_initialize();
7551 };
7552 __kmp_env_initialize( str );
7553
7554 if (__kmp_settings
7555#if OMP_40_ENABLED
7556 || __kmp_display_env || __kmp_display_env_verbose
7557#endif // OMP_40_ENABLED
7558 ) {
7559 __kmp_env_print();
7560 }
7561} // __kmp_aux_set_defaults
7562
7563/* ------------------------------------------------------------------------ */
7564
7565/*
7566 * internal fast reduction routines
7567 */
7568
Jim Cownie5e8470a2013-09-27 10:38:44 +00007569PACKED_REDUCTION_METHOD_T
7570__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7571 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7572 kmp_critical_name *lck )
7573{
7574
7575 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7576 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7577 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7578 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7579
7580 PACKED_REDUCTION_METHOD_T retval;
7581
7582 int team_size;
7583
7584 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7585 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7586
7587 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7588 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7589
7590 retval = critical_reduce_block;
7591
7592 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7593
7594 if( team_size == 1 ) {
7595
7596 retval = empty_reduce_block;
7597
7598 } else {
7599
7600 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7601 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7602
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007603 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007604
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007605 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007606
7607 int teamsize_cutoff = 4;
7608
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007609#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7610 if( __kmp_mic_type != non_mic ) {
7611 teamsize_cutoff = 8;
7612 }
7613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007614 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007615 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007616 if ( atomic_available ) {
7617 retval = atomic_reduce_block;
7618 }
7619 } else {
7620 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7621 }
7622 } else if ( atomic_available ) {
7623 retval = atomic_reduce_block;
7624 }
7625 #else
7626 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007627 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007628
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007629 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007630
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007631 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007632
Jim Cownie5e8470a2013-09-27 10:38:44 +00007633 // basic tuning
7634
7635 if( atomic_available ) {
7636 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7637 retval = atomic_reduce_block;
7638 }
7639 } // otherwise: use critical section
7640
7641 #elif KMP_OS_DARWIN
7642
Jim Cownie5e8470a2013-09-27 10:38:44 +00007643 if( atomic_available && ( num_vars <= 3 ) ) {
7644 retval = atomic_reduce_block;
7645 } else if( tree_available ) {
7646 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7647 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7648 }
7649 } // otherwise: use critical section
7650
7651 #else
7652 #error "Unknown or unsupported OS"
7653 #endif
7654
7655 #else
7656 #error "Unknown or unsupported architecture"
7657 #endif
7658
7659 }
7660
Jim Cownie5e8470a2013-09-27 10:38:44 +00007661 // KMP_FORCE_REDUCTION
7662
Andrey Churbanovec23a952015-08-17 10:12:12 +00007663 // If the team is serialized (team_size == 1), ignore the forced reduction
7664 // method and stay with the unsynchronized method (empty_reduce_block)
7665 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007666
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007667 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007668
7669 int atomic_available, tree_available;
7670
7671 switch( ( forced_retval = __kmp_force_reduction_method ) )
7672 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007673 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007674 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007675 break;
7676
7677 case atomic_reduce_block:
7678 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007679 if( ! atomic_available ) {
7680 KMP_WARNING(RedMethodNotSupported, "atomic");
7681 forced_retval = critical_reduce_block;
7682 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007683 break;
7684
7685 case tree_reduce_block:
7686 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007687 if( ! tree_available ) {
7688 KMP_WARNING(RedMethodNotSupported, "tree");
7689 forced_retval = critical_reduce_block;
7690 } else {
7691 #if KMP_FAST_REDUCTION_BARRIER
7692 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7693 #endif
7694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007695 break;
7696
7697 default:
7698 KMP_ASSERT( 0 ); // "unsupported method specified"
7699 }
7700
7701 retval = forced_retval;
7702 }
7703
7704 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7705
7706 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7707 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7708
7709 return ( retval );
7710}
7711
7712// this function is for testing set/get/determine reduce method
7713kmp_int32
7714__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007715 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007716}
7717
7718/* ------------------------------------------------------------------------ */