blob: 0e20e7bdcd7b250372db0a713bca925555da9c61 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000028
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000029#if OMPT_SUPPORT
30#include "ompt-specific.h"
31#endif
32
Jim Cownie5e8470a2013-09-27 10:38:44 +000033/* these are temporary issues to be dealt with */
34#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#if KMP_OS_WINDOWS
37#include <process.h>
38#endif
39
40
41#if defined(KMP_GOMP_COMPAT)
42char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
43#endif /* defined(KMP_GOMP_COMPAT) */
44
45char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
46#if OMP_40_ENABLED
47 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000048#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000049 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000050#endif
51
52#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000053char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif /* KMP_DEBUG */
55
Jim Cownie181b4bb2013-12-23 17:28:57 +000056#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
57
Jim Cownie5e8470a2013-09-27 10:38:44 +000058/* ------------------------------------------------------------------------ */
59/* ------------------------------------------------------------------------ */
60
61kmp_info_t __kmp_monitor;
62
63/* ------------------------------------------------------------------------ */
64/* ------------------------------------------------------------------------ */
65
66/* Forward declarations */
67
68void __kmp_cleanup( void );
69
70static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000071static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000072#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000073static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000074#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000075static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000076void __kmp_fork_barrier( int gtid, int tid );
77void __kmp_join_barrier( int gtid );
78void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000079
Jim Cownie5e8470a2013-09-27 10:38:44 +000080#ifdef USE_LOAD_BALANCE
81static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
82#endif
83
84static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000086static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000088static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
89static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
90static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
91
92/* ------------------------------------------------------------------------ */
93/* ------------------------------------------------------------------------ */
94
95/* Calculate the identifier of the current thread */
96/* fast (and somewhat portable) way to get unique */
97/* identifier of executing thread. */
98/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
99
100int
101__kmp_get_global_thread_id( )
102{
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
109
110 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
112
113 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
114 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
115 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
116 __kmp_init_gtid for this to work. */
117
118 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
119
120#ifdef KMP_TDATA_GTID
121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
123 return __kmp_gtid;
124 }
125#endif
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
129 }
130 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
131
132 stack_addr = (char*) & stack_data;
133 other_threads = __kmp_threads;
134
135 /*
136 ATT: The code below is a source of potential bugs due to unsynchronized access to
137 __kmp_threads array. For example:
138 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
139 2. Current thread is suspended by OS.
140 3. Another thread unregisters and finishes (debug versions of free() may fill memory
141 with something like 0xEF).
142 4. Current thread is resumed.
143 5. Current thread reads junk from *thr.
144 TODO: Fix it.
145 --ln
146 */
147
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
149
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
151 if( !thr ) continue;
152
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155
156 /* stack grows down -- search through all of the active threads */
157
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
160
161 if( stack_diff <= stack_size ) {
162 /* The only way we can be closer than the allocated */
163 /* stack size is if we are running on this thread. */
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
165 return i;
166 }
167 }
168 }
169
170 /* get specific to try and determine our gtid */
171 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
174
175 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
176
177 /* if we havn't been assigned a gtid, then return code */
178 if( i<0 ) return i;
179
180 /* dynamically updated stack window for uber threads to avoid get_specific call */
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
183 }
184
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
190 } else {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
192 }
193
194 /* Reprint stack bounds for ubermaster since they have been refined */
195 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000196 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000199 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000200 "th_%d stack (refinement)", i );
201 }
202 return i;
203}
204
205int
206__kmp_get_global_thread_id_reg( )
207{
208 int gtid;
209
210 if ( !__kmp_init_serial ) {
211 gtid = KMP_GTID_DNE;
212 } else
213#ifdef KMP_TDATA_GTID
214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
216 gtid = __kmp_gtid;
217 } else
218#endif
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
222 } else {
223 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
225 }
226
227 /* we must be a new uber master sibling thread */
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
235 } else {
236 gtid = __kmp_register_root(FALSE);
237 }
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
239 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
240 }
241
242 KMP_DEBUG_ASSERT( gtid >=0 );
243
244 return gtid;
245}
246
247/* caller must hold forkjoin_lock */
248void
249__kmp_check_stack_overlap( kmp_info_t *th )
250{
251 int f;
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
254 int gtid;
255
256 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000258 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
261 gtid = __kmp_gtid_from_thread( th );
262
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)", "mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
267 } else {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 }
272 }
273
274 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 {
278 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000280 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 }
283
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
286
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
293
294 /* Print the other stack values before the abort */
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
300
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
302 }
303 }
304 }
305 }
306 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
307}
308
309
310/* ------------------------------------------------------------------------ */
311
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312/* ------------------------------------------------------------------------ */
313
314void
315__kmp_infinite_loop( void )
316{
317 static int done = FALSE;
318
319 while (! done) {
320 KMP_YIELD( 1 );
321 }
322}
323
324#define MAX_MESSAGE 512
325
326void
327__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
328 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 va_list ap;
330
331 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000332 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000336 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 if(gtid >= 0) {
338 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
341 if(node < 0) /* doesn't work, so don't try this next time */
342 __kmp_storage_map_verbose = FALSE;
343 else {
344 char *last;
345 int lastNode;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
347
348 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
350 if(localProc >= 0)
351 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
352 else
353 __kmp_printf_no_lock(" GTID %d\n", gtid);
354# if KMP_USE_PRCTL
355/* The more elaborate format is disabled for now because of the prctl hanging bug. */
356 do {
357 last = p1;
358 lastNode = node;
359 /* This loop collates adjacent pages with the same host node. */
360 do {
361 (char*)p1 += PAGE_SIZE;
362 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
364 (char*)p1 - 1, lastNode);
365 } while(p1 <= p2);
366# else
367 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
368 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
369 if(p1 < p2) {
370 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
371 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
372 }
373# endif
374 }
375 }
376 } else
377 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
378 }
379#endif /* KMP_PRINT_DATA_PLACEMENT */
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
381}
382
383void
384__kmp_warn( char const * format, ... )
385{
386 char buffer[MAX_MESSAGE];
387 va_list ap;
388
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
390 return;
391 }
392
393 va_start( ap, format );
394
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000395 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
399
400 va_end( ap );
401}
402
403void
404__kmp_abort_process()
405{
406
407 // Later threads may stall here, but that's ok because abort() will kill them.
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
409
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
412 }; // if
413
414 if ( KMP_OS_WINDOWS ) {
415 // Let other threads know of abnormal termination and prevent deadlock
416 // if abort happened during library initialization or shutdown
417 __kmp_global.g.g_abort = SIGABRT;
418
419 /*
420 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
421 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
422 works well, but this function is not available in VS7 (this is not problem for DLL, but
423 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
424 not help, at least in some versions of MS C RTL.
425
426 It seems following sequence is the only way to simulate abort() and avoid pop-up error
427 box.
428 */
429 raise( SIGABRT );
430 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
431 } else {
432 abort();
433 }; // if
434
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
437
438} // __kmp_abort_process
439
440void
441__kmp_abort_thread( void )
442{
443 // TODO: Eliminate g_abort global variable and this function.
444 // In case of abort just call abort(), it will kill all the threads.
445 __kmp_infinite_loop();
446} // __kmp_abort_thread
447
448/* ------------------------------------------------------------------------ */
449
450/*
451 * Print out the storage map for the major kmp_info_t thread data structures
452 * that are allocated together.
453 */
454
455static void
456__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
457{
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
459
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
462
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
472
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
476
477 #if KMP_FAST_REDUCTION_BARRIER
478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER
482}
483
484/*
485 * Print out the storage map for the major kmp_team_t team data structures
486 * that are allocated together.
487 */
488
489static void
490__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
491{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id );
495
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
498
499
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
502
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
505
506 #if KMP_FAST_REDUCTION_BARRIER
507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
513
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
519 header, team_id );
520
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
524}
525
526static void __kmp_init_allocator() {}
527static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000528
529/* ------------------------------------------------------------------------ */
530
Jonathan Peyton99016992015-05-26 17:32:53 +0000531#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532# if KMP_OS_WINDOWS
533
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534static void
535__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
536 // TODO: Change to __kmp_break_bootstrap_lock().
537 __kmp_init_bootstrap_lock( lck ); // make the lock released
538}
539
540static void
541__kmp_reset_locks_on_process_detach( int gtid_req ) {
542 int i;
543 int thread_count;
544
545 // PROCESS_DETACH is expected to be called by a thread
546 // that executes ProcessExit() or FreeLibrary().
547 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
548 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
549 // However, in fact, some threads can be still alive here, although being about to be terminated.
550 // The threads in the array with ds_thread==0 are most suspicious.
551 // Actually, it can be not safe to access the __kmp_threads[].
552
553 // TODO: does it make sense to check __kmp_roots[] ?
554
555 // Let's check that there are no other alive threads registered with the OMP lib.
556 while( 1 ) {
557 thread_count = 0;
558 for( i = 0; i < __kmp_threads_capacity; ++i ) {
559 if( !__kmp_threads ) continue;
560 kmp_info_t* th = __kmp_threads[ i ];
561 if( th == NULL ) continue;
562 int gtid = th->th.th_info.ds.ds_gtid;
563 if( gtid == gtid_req ) continue;
564 if( gtid < 0 ) continue;
565 DWORD exit_val;
566 int alive = __kmp_is_thread_alive( th, &exit_val );
567 if( alive ) {
568 ++thread_count;
569 }
570 }
571 if( thread_count == 0 ) break; // success
572 }
573
574 // Assume that I'm alone.
575
576 // Now it might be probably safe to check and reset locks.
577 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
578 __kmp_reset_lock( &__kmp_forkjoin_lock );
579 #ifdef KMP_DEBUG
580 __kmp_reset_lock( &__kmp_stdio_lock );
581 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582}
583
584BOOL WINAPI
585DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
586 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
587
588 switch( fdwReason ) {
589
590 case DLL_PROCESS_ATTACH:
591 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
592
593 return TRUE;
594
595 case DLL_PROCESS_DETACH:
596 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
597 __kmp_gtid_get_specific() ));
598
599 if( lpReserved != NULL )
600 {
601 // lpReserved is used for telling the difference:
602 // lpReserved == NULL when FreeLibrary() was called,
603 // lpReserved != NULL when the process terminates.
604 // When FreeLibrary() is called, worker threads remain alive.
605 // So they will release the forkjoin lock by themselves.
606 // When the process terminates, worker threads disappear triggering
607 // the problem of unreleased forkjoin lock as described below.
608
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000609 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 // The problem comes up if that worker thread becomes dead
611 // before it releases the forkjoin lock.
612 // The forkjoin lock remains taken, while the thread
613 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
614 // will try to take the forkjoin lock and will always fail,
615 // so that the application will never finish [normally].
616 // This scenario is possible if __kmpc_end() has not been executed.
617 // It looks like it's not a corner case, but common cases:
618 // - the main function was compiled by an alternative compiler;
619 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
620 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
621 // - alive foreign thread prevented __kmpc_end from doing cleanup.
622
623 // This is a hack to work around the problem.
624 // TODO: !!! to figure out something better.
625 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
626 }
627
628 __kmp_internal_end_library( __kmp_gtid_get_specific() );
629
630 return TRUE;
631
632 case DLL_THREAD_ATTACH:
633 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
634
635 /* if we wanted to register new siblings all the time here call
636 * __kmp_get_gtid(); */
637 return TRUE;
638
639 case DLL_THREAD_DETACH:
640 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
641 __kmp_gtid_get_specific() ));
642
643 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
644 return TRUE;
645 }
646
647 return TRUE;
648}
649
650# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000651#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000652
653
654/* ------------------------------------------------------------------------ */
655
656/* Change the library type to "status" and return the old type */
657/* called from within initialization routines where __kmp_initz_lock is held */
658int
659__kmp_change_library( int status )
660{
661 int old_status;
662
663 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
664
665 if (status) {
666 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
667 }
668 else {
669 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
670 }
671
672 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
673}
674
675/* ------------------------------------------------------------------------ */
676/* ------------------------------------------------------------------------ */
677
678/* __kmp_parallel_deo --
679 * Wait until it's our turn.
680 */
681void
682__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
683{
684 int gtid = *gtid_ref;
685#ifdef BUILD_PARALLEL_ORDERED
686 kmp_team_t *team = __kmp_team_from_gtid( gtid );
687#endif /* BUILD_PARALLEL_ORDERED */
688
689 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000690 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
692 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
693#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 }
697#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000699 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KMP_MB();
702 }
703#endif /* BUILD_PARALLEL_ORDERED */
704}
705
706/* __kmp_parallel_dxo --
707 * Signal the next task.
708 */
709
710void
711__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
712{
713 int gtid = *gtid_ref;
714#ifdef BUILD_PARALLEL_ORDERED
715 int tid = __kmp_tid_from_gtid( gtid );
716 kmp_team_t *team = __kmp_team_from_gtid( gtid );
717#endif /* BUILD_PARALLEL_ORDERED */
718
719 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000720 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
722 }
723#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000724 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725 KMP_MB(); /* Flush all pending memory write invalidates. */
726
727 /* use the tid of the next thread in this team */
728 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000731#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000732 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
734 /* accept blame for "ordered" waiting */
735 kmp_info_t *this_thread = __kmp_threads[gtid];
736 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
737 this_thread->th.ompt_thread_info.wait_id);
738 }
739#endif
740
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741 KMP_MB(); /* Flush all pending memory write invalidates. */
742 }
743#endif /* BUILD_PARALLEL_ORDERED */
744}
745
746/* ------------------------------------------------------------------------ */
747/* ------------------------------------------------------------------------ */
748
749/* ------------------------------------------------------------------------ */
750/* ------------------------------------------------------------------------ */
751
752/* The BARRIER for a SINGLE process section is always explicit */
753
754int
755__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
756{
757 int status;
758 kmp_info_t *th;
759 kmp_team_t *team;
760
761 if( ! TCR_4(__kmp_init_parallel) )
762 __kmp_parallel_initialize();
763
764 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766 status = 0;
767
768 th->th.th_ident = id_ref;
769
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000770 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 status = 1;
772 } else {
773 kmp_int32 old_this = th->th.th_local.this_construct;
774
775 ++th->th.th_local.this_construct;
776 /* try to set team count to thread count--success means thread got the
777 single block
778 */
779 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000780 if (team->t.t_construct == old_this) {
781 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
782 th->th.th_local.this_construct);
783 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000784#if USE_ITT_BUILD
785 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
786#if OMP_40_ENABLED
787 th->th.th_teams_microtask == NULL &&
788#endif
789 team->t.t_active_level == 1 )
790 { // Only report metadata by master of active team at level 1
791 __kmp_itt_metadata_single( id_ref );
792 }
793#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794 }
795
796 if( __kmp_env_consistency_check ) {
797 if (status && push_ws) {
798 __kmp_push_workshare( gtid, ct_psingle, id_ref );
799 } else {
800 __kmp_check_workshare( gtid, ct_psingle, id_ref );
801 }
802 }
803#if USE_ITT_BUILD
804 if ( status ) {
805 __kmp_itt_single_start( gtid );
806 }
807#endif /* USE_ITT_BUILD */
808 return status;
809}
810
811void
812__kmp_exit_single( int gtid )
813{
814#if USE_ITT_BUILD
815 __kmp_itt_single_end( gtid );
816#endif /* USE_ITT_BUILD */
817 if( __kmp_env_consistency_check )
818 __kmp_pop_workshare( gtid, ct_psingle, NULL );
819}
820
821
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822/*
823 * determine if we can go parallel or must use a serialized parallel region and
824 * how many threads we can use
825 * set_nproc is the number of threads requested for the team
826 * returns 0 if we should serialize or only use one thread,
827 * otherwise the number of threads to use
828 * The forkjoin lock is held by the caller.
829 */
830static int
831__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
832 int master_tid, int set_nthreads
833#if OMP_40_ENABLED
834 , int enter_teams
835#endif /* OMP_40_ENABLED */
836)
837{
838 int capacity;
839 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840 KMP_DEBUG_ASSERT( __kmp_init_serial );
841 KMP_DEBUG_ASSERT( root && parent_team );
842
843 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 // If dyn-var is set, dynamically adjust the number of desired threads,
845 // according to the method specified by dynamic_mode.
846 //
847 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
849 ;
850 }
851#ifdef USE_LOAD_BALANCE
852 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
853 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
854 if ( new_nthreads == 1 ) {
855 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
856 master_tid ));
857 return 1;
858 }
859 if ( new_nthreads < set_nthreads ) {
860 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
861 master_tid, new_nthreads ));
862 }
863 }
864#endif /* USE_LOAD_BALANCE */
865 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
866 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
867 : root->r.r_hot_team->t.t_nproc);
868 if ( new_nthreads <= 1 ) {
869 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
870 master_tid ));
871 return 1;
872 }
873 if ( new_nthreads < set_nthreads ) {
874 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
875 master_tid, new_nthreads ));
876 }
877 else {
878 new_nthreads = set_nthreads;
879 }
880 }
881 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
882 if ( set_nthreads > 2 ) {
883 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
884 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
885 if ( new_nthreads == 1 ) {
886 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
887 master_tid ));
888 return 1;
889 }
890 if ( new_nthreads < set_nthreads ) {
891 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
892 master_tid, new_nthreads ));
893 }
894 }
895 }
896 else {
897 KMP_ASSERT( 0 );
898 }
899
900 //
901 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
902 //
903 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
904 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
905 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
906 root->r.r_hot_team->t.t_nproc );
907 if ( tl_nthreads <= 0 ) {
908 tl_nthreads = 1;
909 }
910
911 //
912 // If dyn-var is false, emit a 1-time warning.
913 //
914 if ( ! get__dynamic_2( parent_team, master_tid )
915 && ( ! __kmp_reserve_warn ) ) {
916 __kmp_reserve_warn = 1;
917 __kmp_msg(
918 kmp_ms_warning,
919 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
920 KMP_HNT( Unset_ALL_THREADS ),
921 __kmp_msg_null
922 );
923 }
924 if ( tl_nthreads == 1 ) {
925 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
926 master_tid ));
927 return 1;
928 }
929 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
930 master_tid, tl_nthreads ));
931 new_nthreads = tl_nthreads;
932 }
933
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934 //
935 // Check if the threads array is large enough, or needs expanding.
936 //
937 // See comment in __kmp_register_root() about the adjustment if
938 // __kmp_threads[0] == NULL.
939 //
940 capacity = __kmp_threads_capacity;
941 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
942 --capacity;
943 }
944 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
945 root->r.r_hot_team->t.t_nproc ) > capacity ) {
946 //
947 // Expand the threads array.
948 //
949 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
950 root->r.r_hot_team->t.t_nproc ) - capacity;
951 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
952 if ( slotsAdded < slotsRequired ) {
953 //
954 // The threads array was not expanded enough.
955 //
956 new_nthreads -= ( slotsRequired - slotsAdded );
957 KMP_ASSERT( new_nthreads >= 1 );
958
959 //
960 // If dyn-var is false, emit a 1-time warning.
961 //
962 if ( ! get__dynamic_2( parent_team, master_tid )
963 && ( ! __kmp_reserve_warn ) ) {
964 __kmp_reserve_warn = 1;
965 if ( __kmp_tp_cached ) {
966 __kmp_msg(
967 kmp_ms_warning,
968 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
969 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
970 KMP_HNT( PossibleSystemLimitOnThreads ),
971 __kmp_msg_null
972 );
973 }
974 else {
975 __kmp_msg(
976 kmp_ms_warning,
977 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
978 KMP_HNT( SystemLimitOnThreads ),
979 __kmp_msg_null
980 );
981 }
982 }
983 }
984 }
985
986 if ( new_nthreads == 1 ) {
987 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
988 __kmp_get_gtid(), set_nthreads ) );
989 return 1;
990 }
991
992 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
993 __kmp_get_gtid(), new_nthreads, set_nthreads ));
994 return new_nthreads;
995}
996
997/* ------------------------------------------------------------------------ */
998/* ------------------------------------------------------------------------ */
999
1000/* allocate threads from the thread pool and assign them to the new team */
1001/* we are assured that there are enough threads available, because we
1002 * checked on that earlier within critical section forkjoin */
1003
1004static void
1005__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1006 kmp_info_t *master_th, int master_gtid )
1007{
1008 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001009 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010
1011 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1012 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1013 KMP_MB();
1014
1015 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001016 master_th->th.th_info.ds.ds_tid = 0;
1017 master_th->th.th_team = team;
1018 master_th->th.th_team_nproc = team->t.t_nproc;
1019 master_th->th.th_team_master = master_th;
1020 master_th->th.th_team_serialized = FALSE;
1021 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001022
1023 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001024#if KMP_NESTED_HOT_TEAMS
1025 use_hot_team = 0;
1026 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1027 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1028 int level = team->t.t_active_level - 1; // index in array of hot teams
1029 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1030 if( master_th->th.th_teams_size.nteams > 1 ) {
1031 ++level; // level was not increased in teams construct for team_of_masters
1032 }
1033 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1034 master_th->th.th_teams_level == team->t.t_level ) {
1035 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1036 } // team->t.t_level will be increased inside parallel
1037 }
1038 if( level < __kmp_hot_teams_max_level ) {
1039 if( hot_teams[level].hot_team ) {
1040 // hot team has already been allocated for given level
1041 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1042 use_hot_team = 1; // the team is ready to use
1043 } else {
1044 use_hot_team = 0; // AC: threads are not allocated yet
1045 hot_teams[level].hot_team = team; // remember new hot team
1046 hot_teams[level].hot_team_nth = team->t.t_nproc;
1047 }
1048 } else {
1049 use_hot_team = 0;
1050 }
1051 }
1052#else
1053 use_hot_team = team == root->r.r_hot_team;
1054#endif
1055 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001056
1057 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001058 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059 __kmp_initialize_info( master_th, team, 0, master_gtid );
1060
1061 /* now, install the worker threads */
1062 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1063
1064 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001065 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1066 team->t.t_threads[ i ] = thr;
1067 KMP_DEBUG_ASSERT( thr );
1068 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001070 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1072 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1073 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1074 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075#if OMP_40_ENABLED
1076 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1077 thr->th.th_teams_level = master_th->th.th_teams_level;
1078 thr->th.th_teams_size = master_th->th.th_teams_size;
1079#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080 { // Initialize threads' barrier data.
1081 int b;
1082 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1083 for ( b = 0; b < bs_last_barrier; ++ b ) {
1084 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001085 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001086#if USE_DEBUGGER
1087 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1088#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001089 }; // for b
1090 }
1091 }
1092
Alp Toker98758b02014-03-02 04:12:06 +00001093#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094 __kmp_partition_places( team );
1095#endif
1096
1097 }
1098
1099 KMP_MB();
1100}
1101
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001102#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1103//
1104// Propagate any changes to the floating point control registers out to the team
1105// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1106// so we don't make changes unless they are needed.
1107//
1108inline static void
1109propagateFPControl(kmp_team_t * team)
1110{
1111 if ( __kmp_inherit_fp_control ) {
1112 kmp_int16 x87_fpu_control_word;
1113 kmp_uint32 mxcsr;
1114
1115 // Get master values of FPU control flags (both X87 and vector)
1116 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1117 __kmp_store_mxcsr( &mxcsr );
1118 mxcsr &= KMP_X86_MXCSR_MASK;
1119
1120 // There is no point looking at t_fp_control_saved here.
1121 // If it is TRUE, we still have to update the values if they are different from those we now have.
1122 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1123 // that the values in the team are the same as those we have.
1124 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1125 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1126 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001127 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1128 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001129 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1130 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001131 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001132 }
1133 else {
1134 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001135 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001136 }
1137}
1138
1139// Do the opposite, setting the hardware registers to the updated values from the team.
1140inline static void
1141updateHWFPControl(kmp_team_t * team)
1142{
1143 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1144 //
1145 // Only reset the fp control regs if they have been changed in the team.
1146 // the parallel region that we are exiting.
1147 //
1148 kmp_int16 x87_fpu_control_word;
1149 kmp_uint32 mxcsr;
1150 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1151 __kmp_store_mxcsr( &mxcsr );
1152 mxcsr &= KMP_X86_MXCSR_MASK;
1153
1154 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1155 __kmp_clear_x87_fpu_status_word();
1156 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1157 }
1158
1159 if ( team->t.t_mxcsr != mxcsr ) {
1160 __kmp_load_mxcsr( &team->t.t_mxcsr );
1161 }
1162 }
1163}
1164#else
1165# define propagateFPControl(x) ((void)0)
1166# define updateHWFPControl(x) ((void)0)
1167#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1168
Jim Cownie5e8470a2013-09-27 10:38:44 +00001169static void
1170__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1171
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001172/*
1173 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1174 */
1175void
1176__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1177{
1178 kmp_info_t *this_thr;
1179 kmp_team_t *serial_team;
1180
1181 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1182
1183 /* Skip all this code for autopar serialized loops since it results in
1184 unacceptable overhead */
1185 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1186 return;
1187
1188 if( ! TCR_4( __kmp_init_parallel ) )
1189 __kmp_parallel_initialize();
1190
1191 this_thr = __kmp_threads[ global_tid ];
1192 serial_team = this_thr->th.th_serial_team;
1193
1194 /* utilize the serialized team held by this thread */
1195 KMP_DEBUG_ASSERT( serial_team );
1196 KMP_MB();
1197
1198 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001199 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1200 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1202 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1203 this_thr->th.th_task_team = NULL;
1204 }
1205
1206#if OMP_40_ENABLED
1207 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1208 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1209 proc_bind = proc_bind_false;
1210 }
1211 else if ( proc_bind == proc_bind_default ) {
1212 //
1213 // No proc_bind clause was specified, so use the current value
1214 // of proc-bind-var for this parallel region.
1215 //
1216 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1217 }
1218 //
1219 // Reset for next parallel region
1220 //
1221 this_thr->th.th_set_proc_bind = proc_bind_default;
1222#endif /* OMP_40_ENABLED */
1223
1224 if( this_thr->th.th_team != serial_team ) {
1225 // Nested level will be an index in the nested nthreads array
1226 int level = this_thr->th.th_team->t.t_level;
1227
1228 if( serial_team->t.t_serialized ) {
1229 /* this serial team was already used
1230 * TODO increase performance by making this locks more specific */
1231 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001232
1233 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1234
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001235#if OMPT_SUPPORT
1236 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1237#endif
1238
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001240#if OMPT_SUPPORT
1241 ompt_parallel_id,
1242#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001243#if OMP_40_ENABLED
1244 proc_bind,
1245#endif
1246 & this_thr->th.th_current_task->td_icvs,
1247 0 USE_NESTED_HOT_ARG(NULL) );
1248 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1249 KMP_ASSERT( new_team );
1250
1251 /* setup new serialized team and install it */
1252 new_team->t.t_threads[0] = this_thr;
1253 new_team->t.t_parent = this_thr->th.th_team;
1254 serial_team = new_team;
1255 this_thr->th.th_serial_team = serial_team;
1256
1257 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1258 global_tid, serial_team ) );
1259
1260
1261 /* TODO the above breaks the requirement that if we run out of
1262 * resources, then we can still guarantee that serialized teams
1263 * are ok, since we may need to allocate a new one */
1264 } else {
1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team ) );
1267 }
1268
1269 /* we have to initialize this serial team */
1270 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1271 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1272 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
1277 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1280
1281 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1282 global_tid, this_thr->th.th_current_task ) );
1283 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1284 this_thr->th.th_current_task->td_flags.executing = 0;
1285
1286 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1287
1288 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1289 each serialized task represented by team->t.t_serialized? */
1290 copy_icvs(
1291 & this_thr->th.th_current_task->td_icvs,
1292 & this_thr->th.th_current_task->td_parent->td_icvs );
1293
1294 // Thread value exists in the nested nthreads array for the next nested level
1295 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1296 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1297 }
1298
1299#if OMP_40_ENABLED
1300 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1301 this_thr->th.th_current_task->td_icvs.proc_bind
1302 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1303 }
1304#endif /* OMP_40_ENABLED */
1305
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001306#if USE_DEBUGGER
1307 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1308#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001309 this_thr->th.th_info.ds.ds_tid = 0;
1310
1311 /* set thread cache values */
1312 this_thr->th.th_team_nproc = 1;
1313 this_thr->th.th_team_master = this_thr;
1314 this_thr->th.th_team_serialized = 1;
1315
1316 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1317 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1318
1319 propagateFPControl (serial_team);
1320
1321 /* check if we need to allocate dispatch buffers stack */
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1324 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1325 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1326 }
1327 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1328
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329#if OMPT_SUPPORT
1330 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1331 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1332#endif
1333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 KMP_MB();
1335
1336 } else {
1337 /* this serialized team is already being used,
1338 * that's fine, just add another nested level */
1339 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1340 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1341 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1342 ++ serial_team->t.t_serialized;
1343 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1344
1345 // Nested level will be an index in the nested nthreads array
1346 int level = this_thr->th.th_team->t.t_level;
1347 // Thread value exists in the nested nthreads array for the next nested level
1348 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1349 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1350 }
1351 serial_team->t.t_level++;
1352 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1353 global_tid, serial_team, serial_team->t.t_level ) );
1354
1355 /* allocate/push dispatch buffers stack */
1356 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1357 {
1358 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1359 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1360 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1361 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1362 }
1363 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1364
1365 KMP_MB();
1366 }
1367
1368 if ( __kmp_env_consistency_check )
1369 __kmp_push_parallel( global_tid, NULL );
1370
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001371}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001372
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373/* most of the work for a fork */
1374/* return true if we really went parallel, false if serialized */
1375int
1376__kmp_fork_call(
1377 ident_t * loc,
1378 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001379 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001381#if OMPT_SUPPORT
1382 void *unwrapped_task,
1383#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001384 microtask_t microtask,
1385 launch_t invoker,
1386/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001387#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 va_list * ap
1389#else
1390 va_list ap
1391#endif
1392 )
1393{
1394 void **argv;
1395 int i;
1396 int master_tid;
1397 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001398 kmp_team_t *team;
1399 kmp_team_t *parent_team;
1400 kmp_info_t *master_th;
1401 kmp_root_t *root;
1402 int nthreads;
1403 int master_active;
1404 int master_set_numthreads;
1405 int level;
1406#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001407 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408 int teams_level;
1409#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001410#if KMP_NESTED_HOT_TEAMS
1411 kmp_hot_team_ptr_t **p_hot_teams;
1412#endif
1413 { // KMP_TIME_BLOCK
Jonathan Peyton45be4502015-08-11 21:36:41 +00001414 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1415 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416
1417 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1419 /* Some systems prefer the stack for the root thread(s) to start with */
1420 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001421 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422 /* These 2 lines below are so this does not get optimized out */
1423 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1424 __kmp_stkpadding += (short)((kmp_int64)dummy);
1425 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001426
1427 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429 if( ! TCR_4(__kmp_init_parallel) )
1430 __kmp_parallel_initialize();
1431
1432 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001433 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1434 parent_team = master_th->th.th_team;
1435 master_tid = master_th->th.th_info.ds.ds_tid;
1436 master_this_cons = master_th->th.th_local.this_construct;
1437 root = master_th->th.th_root;
1438 master_active = root->r.r_active;
1439 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001440
1441#if OMPT_SUPPORT
1442 ompt_parallel_id_t ompt_parallel_id;
1443 ompt_task_id_t ompt_task_id;
1444 ompt_frame_t *ompt_frame;
1445 ompt_task_id_t my_task_id;
1446 ompt_parallel_id_t my_parallel_id;
1447
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001448 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001449 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1450 ompt_task_id = __ompt_get_task_id_internal(0);
1451 ompt_frame = __ompt_get_task_frame_internal(0);
1452 }
1453#endif
1454
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 // Nested level will be an index in the nested nthreads array
1456 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001458#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1460#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001461#if KMP_NESTED_HOT_TEAMS
1462 p_hot_teams = &master_th->th.th_hot_teams;
1463 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1464 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1465 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1466 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1467 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1468 }
1469#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001471#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001472 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001473 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1474 int team_size = master_set_numthreads;
1475
1476 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1477 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001478 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 }
1480#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481
Jim Cownie5e8470a2013-09-27 10:38:44 +00001482 master_th->th.th_ident = loc;
1483
1484#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001485 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1487 // AC: This is start of parallel that is nested inside teams construct.
1488 // The team is actual (hot), all workers are ready at the fork barrier.
1489 // No lock needed to initialize the team a bit, then free workers.
1490 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001491 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 parent_team->t.t_argc = argc;
1493 argv = (void**)parent_team->t.t_argv;
1494 for( i=argc-1; i >= 0; --i )
1495/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001496#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497 *argv++ = va_arg( *ap, void * );
1498#else
1499 *argv++ = va_arg( ap, void * );
1500#endif
1501 /* Increment our nested depth levels, but not increase the serialization */
1502 if ( parent_team == master_th->th.th_serial_team ) {
1503 // AC: we are in serialized parallel
1504 __kmpc_serialized_parallel(loc, gtid);
1505 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1506 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1507 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508
1509#if OMPT_SUPPORT
1510 void *dummy;
1511 void **exit_runtime_p;
1512
1513 ompt_lw_taskteam_t lw_taskteam;
1514
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001515 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001516 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1517 unwrapped_task, ompt_parallel_id);
1518 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1519 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1520
1521 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1522
1523#if OMPT_TRACE
1524 /* OMPT implicit task begin */
1525 my_task_id = lw_taskteam.ompt_task_info.task_id;
1526 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1529 my_parallel_id, my_task_id);
1530 }
1531#endif
1532
1533 /* OMPT state */
1534 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1535 } else {
1536 exit_runtime_p = &dummy;
1537 }
1538#endif
1539
Jonathan Peyton45be4502015-08-11 21:36:41 +00001540 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001541 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1542 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001543 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001544#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001545 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001546#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001547 );
1548 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549
1550#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001551 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#if OMPT_TRACE
1553 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1554
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001555 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001556 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1557 ompt_parallel_id, ompt_task_id);
1558 }
1559
1560 __ompt_lw_taskteam_unlink(master_th);
1561 // reset clear the task id only after unlinking the task
1562 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1563#endif
1564
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001565 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001566 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001567 ompt_parallel_id, ompt_task_id,
1568 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001569 }
1570 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1571 }
1572#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001573 return TRUE;
1574 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001575
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001577#if OMPT_SUPPORT
1578 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1579#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 parent_team->t.t_invoke = invoker;
1581 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1582 parent_team->t.t_active_level ++;
1583 parent_team->t.t_level ++;
1584
1585 /* Change number of threads in the team if requested */
1586 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001587 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001588 // AC: only can reduce the number of threads dynamically, cannot increase
1589 kmp_info_t **other_threads = parent_team->t.t_threads;
1590 parent_team->t.t_nproc = master_set_numthreads;
1591 for ( i = 0; i < master_set_numthreads; ++i ) {
1592 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1593 }
1594 // Keep extra threads hot in the team for possible next parallels
1595 }
1596 master_th->th.th_set_nproc = 0;
1597 }
1598
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001599#if USE_DEBUGGER
1600 if ( __kmp_debugging ) { // Let debugger override number of threads.
1601 int nth = __kmp_omp_num_threads( loc );
1602 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1603 master_set_numthreads = nth;
1604 }; // if
1605 }; // if
1606#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001607
1608 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1609 __kmp_internal_fork( loc, gtid, parent_team );
1610 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1611
1612 /* Invoke microtask for MASTER thread */
1613 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1614 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1615
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001616 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001617 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1618 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001619 if (! parent_team->t.t_invoke( gtid )) {
1620 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001622 }
1623 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1624 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1625 KMP_MB(); /* Flush all pending memory write invalidates. */
1626
1627 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1628
1629 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001630 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631#endif /* OMP_40_ENABLED */
1632
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001633#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001635 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001637#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1640 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001641 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001642#if OMP_40_ENABLED
1643 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001647
1648 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1649 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1650 if (nthreads > 1) {
1651 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1652#if OMP_40_ENABLED
1653 && !enter_teams
1654#endif /* OMP_40_ENABLED */
1655 ) ) || ( __kmp_library == library_serial ) ) {
1656 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1657 gtid, nthreads ));
1658 nthreads = 1;
1659 }
1660 }
1661 if ( nthreads > 1 ) {
1662 /* determine how many new threads we can use */
1663 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1664
1665 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001667/* AC: If we execute teams from parallel region (on host), then teams should be created
1668 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1669 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001670 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001673 if ( nthreads == 1 ) {
1674 // Free lock for single thread execution here;
1675 // for multi-thread execution it will be freed later
1676 // after team of threads created and initialized
1677 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1678 }
1679 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680 }
1681 KMP_DEBUG_ASSERT( nthreads > 0 );
1682
1683 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001684 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685
Jim Cownie5e8470a2013-09-27 10:38:44 +00001686 /* create a serialized parallel region? */
1687 if ( nthreads == 1 ) {
1688 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001689#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690 void * args[ argc ];
1691#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001692 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001693#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1696
1697 __kmpc_serialized_parallel(loc, gtid);
1698
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001699 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001701 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702#if OMP_40_ENABLED
1703 if ( !ap ) {
1704 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001705 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001707
1708#if OMPT_SUPPORT
1709 void *dummy;
1710 void **exit_runtime_p;
1711
1712 ompt_lw_taskteam_t lw_taskteam;
1713
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001714 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001715 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1716 unwrapped_task, ompt_parallel_id);
1717 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1718 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1719
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1721
1722#if OMPT_TRACE
1723 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001724 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001725 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1726 ompt_parallel_id, my_task_id);
1727 }
1728#endif
1729
1730 /* OMPT state */
1731 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1732 } else {
1733 exit_runtime_p = &dummy;
1734 }
1735#endif
1736
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001737 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001738 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1739 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1741#if OMPT_SUPPORT
1742 , exit_runtime_p
1743#endif
1744 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001745 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001746
1747#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001748 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001749 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1750
1751#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001752 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1754 ompt_parallel_id, ompt_task_id);
1755 }
1756#endif
1757
1758 __ompt_lw_taskteam_unlink(master_th);
1759 // reset clear the task id only after unlinking the task
1760 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1761
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001762 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001764 ompt_parallel_id, ompt_task_id,
1765 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001766 }
1767 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1768 }
1769#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001770 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1771 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1772 team = master_th->th.th_team;
1773 //team->t.t_pkfn = microtask;
1774 team->t.t_invoke = invoker;
1775 __kmp_alloc_argv_entries( argc, team, TRUE );
1776 team->t.t_argc = argc;
1777 argv = (void**) team->t.t_argv;
1778 if ( ap ) {
1779 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001780// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001781# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001782 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001783# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001784 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001785# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786 } else {
1787 for( i=0; i < argc; ++i )
1788 // Get args from parent team for teams construct
1789 argv[i] = parent_team->t.t_argv[i];
1790 }
1791 // AC: revert change made in __kmpc_serialized_parallel()
1792 // because initial code in teams should have level=0
1793 team->t.t_level--;
1794 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798 invoker(gtid);
1799 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800 } else {
1801#endif /* OMP_40_ENABLED */
1802 argv = args;
1803 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001804// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001805#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001806 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001811
1812#if OMPT_SUPPORT
1813 void *dummy;
1814 void **exit_runtime_p;
1815
1816 ompt_lw_taskteam_t lw_taskteam;
1817
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001818 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1820 unwrapped_task, ompt_parallel_id);
1821 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1822 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1823
1824 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1825
1826#if OMPT_TRACE
1827 /* OMPT implicit task begin */
1828 my_task_id = lw_taskteam.ompt_task_info.task_id;
1829 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001830 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001831 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1832 my_parallel_id, my_task_id);
1833 }
1834#endif
1835
1836 /* OMPT state */
1837 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1838 } else {
1839 exit_runtime_p = &dummy;
1840 }
1841#endif
1842
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001843 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001844 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1845 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001846 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1847#if OMPT_SUPPORT
1848 , exit_runtime_p
1849#endif
1850 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001852
1853#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001854 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001855#if OMPT_TRACE
1856 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1857
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001858 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001859 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1860 my_parallel_id, my_task_id);
1861 }
1862#endif
1863
1864 __ompt_lw_taskteam_unlink(master_th);
1865 // reset clear the task id only after unlinking the task
1866 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1867
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001868 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001869 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001870 ompt_parallel_id, ompt_task_id,
1871 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001872 }
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1874 }
1875#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876#if OMP_40_ENABLED
1877 }
1878#endif /* OMP_40_ENABLED */
1879 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001880 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001881#if OMPT_SUPPORT
1882 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1883 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1884 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1885 unwrapped_task, ompt_parallel_id);
1886
1887 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1888 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1889 __ompt_lw_taskteam_link(lwt, master_th);
1890#endif
1891
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001892 // we were called from GNU native code
1893 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1894 return FALSE;
1895 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001897 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001898 }
1899
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001901 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902 KMP_MB();
1903 return FALSE;
1904 }
1905
Jim Cownie5e8470a2013-09-27 10:38:44 +00001906 // GEH: only modify the executing flag in the case when not serialized
1907 // serialized case is handled in kmpc_serialized_parallel
1908 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001909 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1910 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1912 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1913 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914
1915#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001916 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917#endif /* OMP_40_ENABLED */
1918 {
1919 /* Increment our nested depth level */
1920 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1921 }
1922
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001925 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1926 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927 }
1928 else {
1929 nthreads_icv = 0; // don't update
1930 }
1931
1932#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001935 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1937 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 if (proc_bind == proc_bind_default) {
1941 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001944 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1945 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001947 if ((level+1 < __kmp_nested_proc_bind.used)
1948 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1949 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950 }
1951 }
1952
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 master_th->th.th_set_proc_bind = proc_bind_default;
1955#endif /* OMP_40_ENABLED */
1956
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001957 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001959 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001961 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001963 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001965 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966 new_icvs.nproc = nthreads_icv;
1967 }
1968
1969#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 new_icvs.proc_bind = proc_bind_icv;
1972 }
1973#endif /* OMP_40_ENABLED */
1974
1975 /* allocate a new parallel team */
1976 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1977 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001978#if OMPT_SUPPORT
1979 ompt_parallel_id,
1980#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001982 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001984 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1985 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986 /* allocate a new parallel team */
1987 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001989#if OMPT_SUPPORT
1990 ompt_parallel_id,
1991#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001995 &master_th->th.th_current_task->td_icvs, argc
1996 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999
2000 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002001 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2002 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2003 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2004 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2005 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002006#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002007 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002008#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002009 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010 // TODO: parent_team->t.t_level == INT_MAX ???
2011#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002012 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002014 int new_level = parent_team->t.t_level + 1;
2015 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2016 new_level = parent_team->t.t_active_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#if OMP_40_ENABLED
2019 } else {
2020 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002021 int new_level = parent_team->t.t_level;
2022 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2023 new_level = parent_team->t.t_active_level;
2024 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025 }
2026#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002027 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002028 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002029 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002031#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002033#endif
2034
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002035 // Update the floating point rounding in the team if required.
2036 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037
2038 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002039 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002040#if 0
2041 // Patch out an assertion that trips while the runtime seems to operate correctly.
2042 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002043 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002044#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002047 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002048
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002049 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002050 // Take a memo of master's task_state
2051 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2052 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002053 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2054 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002055 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002056 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002057 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2058 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2059 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002060 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2061 new_stack[i] = 0;
2062 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002063 old_stack = master_th->th.th_task_state_memo_stack;
2064 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 __kmp_free(old_stack);
2067 }
2068 // Store master's task_state on stack
2069 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2070 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002071#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002072 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002073 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2074 }
2075 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002076#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002077 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002078#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002079 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002080#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002081 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002082#if !KMP_NESTED_HOT_TEAMS
2083 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2084#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086
2087 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2088 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2089 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2090 ( team->t.t_master_tid == 0 &&
2091 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2092 KMP_MB();
2093
2094 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002095 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096#if OMP_40_ENABLED
2097 if ( ap ) {
2098#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002099 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002101#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002102 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002104 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002106 KMP_CHECK_UPDATE(*argv, new_argv);
2107 argv++;
2108 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109#if OMP_40_ENABLED
2110 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2114 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115 }
2116#endif /* OMP_40_ENABLED */
2117
2118 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002119 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002120 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2121 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122
2123 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002124 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002125
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002126#if OMPT_SUPPORT
2127 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2128#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129
2130 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2131
Jim Cownie5e8470a2013-09-27 10:38:44 +00002132#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002133 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002135 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002137 ) {
2138#if USE_ITT_NOTIFY
2139 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2140 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002141 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 kmp_uint64 tmp_time = 0;
2143 if ( __itt_get_timestamp_ptr )
2144 tmp_time = __itt_get_timestamp();
2145 // Internal fork - report frame begin
2146 master_th->th.th_frame_time = tmp_time;
2147 if ( __kmp_forkjoin_frames_mode == 3 )
2148 team->t.t_region_time = tmp_time;
2149 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2150#endif /* USE_ITT_NOTIFY */
2151 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2152 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2153 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002154 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2155 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157#endif /* USE_ITT_BUILD */
2158
2159 /* now go on and do the work */
2160 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2161 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002162 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2163 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002164
2165#if USE_ITT_BUILD
2166 if ( __itt_stack_caller_create_ptr ) {
2167 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2168 }
2169#endif /* USE_ITT_BUILD */
2170
2171#if OMP_40_ENABLED
2172 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2173#endif /* OMP_40_ENABLED */
2174 {
2175 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002176 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2177 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178 }
2179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002180 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002181 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2182 return TRUE;
2183 }
2184
2185 /* Invoke microtask for MASTER thread */
2186 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2187 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002188 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002190 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002191 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2192 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00002193 // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002194 if (! team->t.t_invoke( gtid )) {
2195 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002197 }
2198 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2199 gtid, team->t.t_id, team->t.t_pkfn ) );
2200 KMP_MB(); /* Flush all pending memory write invalidates. */
2201
2202 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2203
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002204#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002205 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002206 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2207 }
2208#endif
2209
Jim Cownie5e8470a2013-09-27 10:38:44 +00002210 return TRUE;
2211}
2212
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002213#if OMPT_SUPPORT
2214static inline void
2215__kmp_join_restore_state(
2216 kmp_info_t *thread,
2217 kmp_team_t *team)
2218{
2219 // restore state outside the region
2220 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2221 ompt_state_work_serial : ompt_state_work_parallel);
2222}
2223
2224static inline void
2225__kmp_join_ompt(
2226 kmp_info_t *thread,
2227 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002228 ompt_parallel_id_t parallel_id,
2229 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002230{
2231 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2232 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2233 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002234 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235 }
2236
2237 __kmp_join_restore_state(thread,team);
2238}
2239#endif
2240
Jim Cownie5e8470a2013-09-27 10:38:44 +00002241void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002242__kmp_join_call(ident_t *loc, int gtid
2243#if OMPT_SUPPORT
2244 , enum fork_context_e fork_context
2245#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246#if OMP_40_ENABLED
2247 , int exit_teams
2248#endif /* OMP_40_ENABLED */
2249)
2250{
Jonathan Peyton45be4502015-08-11 21:36:41 +00002251 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252 kmp_team_t *team;
2253 kmp_team_t *parent_team;
2254 kmp_info_t *master_th;
2255 kmp_root_t *root;
2256 int master_active;
2257 int i;
2258
2259 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2260
2261 /* setup current data */
2262 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002263 root = master_th->th.th_root;
2264 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265 parent_team = team->t.t_parent;
2266
2267 master_th->th.th_ident = loc;
2268
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002269#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002270 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002271 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2272 }
2273#endif
2274
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002275#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2277 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2278 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002279 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2280 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002282#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
2284 if( team->t.t_serialized ) {
2285#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002286 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 // We are in teams construct
2288 int level = team->t.t_level;
2289 int tlevel = master_th->th.th_teams_level;
2290 if ( level == tlevel ) {
2291 // AC: we haven't incremented it earlier at start of teams construct,
2292 // so do it here - at the end of teams construct
2293 team->t.t_level++;
2294 } else if ( level == tlevel + 1 ) {
2295 // AC: we are exiting parallel inside teams, need to increment serialization
2296 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2297 team->t.t_serialized++;
2298 }
2299 }
2300#endif /* OMP_40_ENABLED */
2301 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002302
2303#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002304 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002305 __kmp_join_restore_state(master_th, parent_team);
2306 }
2307#endif
2308
Jim Cownie5e8470a2013-09-27 10:38:44 +00002309 return;
2310 }
2311
2312 master_active = team->t.t_master_active;
2313
2314#if OMP_40_ENABLED
2315 if (!exit_teams)
2316#endif /* OMP_40_ENABLED */
2317 {
2318 // AC: No barrier for internal teams at exit from teams construct.
2319 // But there is barrier for external team (league).
2320 __kmp_internal_join( loc, gtid, team );
2321 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002322#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002323 else {
2324 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2325 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002326#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002327
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328 KMP_MB();
2329
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002330#if OMPT_SUPPORT
2331 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2332#endif
2333
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334#if USE_ITT_BUILD
2335 if ( __itt_stack_caller_create_ptr ) {
2336 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2337 }
2338
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002339 // Mark end of "parallel" region for VTune.
2340 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002342 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002344 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002345 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002346 // only one notification scheme (either "submit" or "forking/joined", not both)
2347 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2348 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2349 0, loc, master_th->th.th_team_nproc, 1 );
2350 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2351 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2352 __kmp_itt_region_joined( gtid );
2353 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354#endif /* USE_ITT_BUILD */
2355
2356#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002357 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358 !exit_teams &&
2359 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2360 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2361 // AC: We need to leave the team structure intact at the end
2362 // of parallel inside the teams construct, so that at the next
2363 // parallel same (hot) team works, only adjust nesting levels
2364
2365 /* Decrement our nested depth level */
2366 team->t.t_level --;
2367 team->t.t_active_level --;
2368 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2369
2370 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002373 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374 kmp_info_t **other_threads = team->t.t_threads;
2375 team->t.t_nproc = new_num;
2376 for ( i = 0; i < old_num; ++i ) {
2377 other_threads[i]->th.th_team_nproc = new_num;
2378 }
2379 // Adjust states of non-used threads of the team
2380 for ( i = old_num; i < new_num; ++i ) {
2381 // Re-initialize thread's barrier data.
2382 int b;
2383 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002384 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002386 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002387#if USE_DEBUGGER
2388 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002391 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2392 // Synchronize thread's task state
2393 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2394 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 }
2396 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002397
2398#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002399 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002400 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002401 }
2402#endif
2403
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 return;
2405 }
2406#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002408 /* do cleanup and restore the parent team */
2409 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2410 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2411
2412 master_th->th.th_dispatch =
2413 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414
2415 /* jc: The following lock has instructions with REL and ACQ semantics,
2416 separating the parallel user code called in this parallel region
2417 from the serial user code called after this function returns.
2418 */
2419 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2420
2421#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002422 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423#endif /* OMP_40_ENABLED */
2424 {
2425 /* Decrement our nested depth level */
2426 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2427 }
2428 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2429
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002430#if OMPT_SUPPORT && OMPT_TRACE
2431 if(ompt_enabled){
2432 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2433 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2434 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2435 parallel_id, task_info->task_id);
2436 }
2437 task_info->frame.exit_runtime_frame = 0;
2438 task_info->task_id = 0;
2439 }
2440#endif
2441
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2443 0, master_th, team ) );
2444 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445
Alp Toker98758b02014-03-02 04:12:06 +00002446#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447 //
2448 // Restore master thread's partition.
2449 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002450 master_th->th.th_first_place = team->t.t_first_place;
2451 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452#endif /* OMP_40_ENABLED */
2453
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002454 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002456 if ( root->r.r_active != master_active )
2457 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460
2461 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002462 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002464 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465 * assertion failures. it's only one deref&assign so might as well put this
2466 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002467 master_th->th.th_team = parent_team;
2468 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2469 master_th->th.th_team_master = parent_team->t.t_threads[0];
2470 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471
2472 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002473 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 parent_team != master_th->th.th_serial_team &&
2475 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002476 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2477 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478 }
2479
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002481 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2482 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2483 // Remember master's state if we re-use this nested hot team
2484 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002485 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002486 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002487 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002489 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002490 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002492 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002495 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2496 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2497 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498
2499 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2500
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002501#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002502 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002503 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002504 }
2505#endif
2506
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507 KMP_MB();
2508 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2509}
2510
2511/* ------------------------------------------------------------------------ */
2512/* ------------------------------------------------------------------------ */
2513
2514/* Check whether we should push an internal control record onto the
2515 serial team stack. If so, do it. */
2516void
2517__kmp_save_internal_controls ( kmp_info_t * thread )
2518{
2519
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002520 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521 return;
2522 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002523 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524 int push = 0;
2525
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002526 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527 push = 1;
2528 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002529 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2530 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002531 push = 1;
2532 }
2533 }
2534 if (push) { /* push a record on the serial team's stack */
2535 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2536
Jim Cownie5e8470a2013-09-27 10:38:44 +00002537 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538
2539 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2540
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002541 control->next = thread->th.th_team->t.t_control_stack_top;
2542 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543 }
2544 }
2545}
2546
2547/* Changes set_nproc */
2548void
2549__kmp_set_num_threads( int new_nth, int gtid )
2550{
2551 kmp_info_t *thread;
2552 kmp_root_t *root;
2553
2554 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2555 KMP_DEBUG_ASSERT( __kmp_init_serial );
2556
2557 if (new_nth < 1)
2558 new_nth = 1;
2559 else if (new_nth > __kmp_max_nth)
2560 new_nth = __kmp_max_nth;
2561
Jonathan Peyton45be4502015-08-11 21:36:41 +00002562 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002563 thread = __kmp_threads[gtid];
2564
2565 __kmp_save_internal_controls( thread );
2566
2567 set__nproc( thread, new_nth );
2568
2569 //
2570 // If this omp_set_num_threads() call will cause the hot team size to be
2571 // reduced (in the absence of a num_threads clause), then reduce it now,
2572 // rather than waiting for the next parallel region.
2573 //
2574 root = thread->th.th_root;
2575 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002576 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2577#if KMP_NESTED_HOT_TEAMS
2578 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2579#endif
2580 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002581 kmp_team_t *hot_team = root->r.r_hot_team;
2582 int f;
2583
2584 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2585
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2588 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002589 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2590 // When decreasing team size, threads no longer in the team should unref task team.
2591 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2592 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593 __kmp_free_thread( hot_team->t.t_threads[f] );
2594 hot_team->t.t_threads[f] = NULL;
2595 }
2596 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002597#if KMP_NESTED_HOT_TEAMS
2598 if( thread->th.th_hot_teams ) {
2599 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2600 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2601 }
2602#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2605
2606 //
2607 // Update the t_nproc field in the threads that are still active.
2608 //
2609 for( f=0 ; f < new_nth; f++ ) {
2610 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2611 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2612 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002614 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616}
2617
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618/* Changes max_active_levels */
2619void
2620__kmp_set_max_active_levels( int gtid, int max_active_levels )
2621{
2622 kmp_info_t *thread;
2623
2624 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2625 KMP_DEBUG_ASSERT( __kmp_init_serial );
2626
2627 // validate max_active_levels
2628 if( max_active_levels < 0 ) {
2629 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2630 // We ignore this call if the user has specified a negative value.
2631 // The current setting won't be changed. The last valid setting will be used.
2632 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2633 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2634 return;
2635 }
2636 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2637 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2638 // We allow a zero value. (implementation defined behavior)
2639 } else {
2640 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2641 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2642 // Current upper limit is MAX_INT. (implementation defined behavior)
2643 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2644 // Actually, the flow should never get here until we use MAX_INT limit.
2645 }
2646 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2647
2648 thread = __kmp_threads[ gtid ];
2649
2650 __kmp_save_internal_controls( thread );
2651
2652 set__max_active_levels( thread, max_active_levels );
2653
2654}
2655
2656/* Gets max_active_levels */
2657int
2658__kmp_get_max_active_levels( int gtid )
2659{
2660 kmp_info_t *thread;
2661
2662 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2663 KMP_DEBUG_ASSERT( __kmp_init_serial );
2664
2665 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002666 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002668 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2669 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670}
2671
2672/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2673void
2674__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2675{
2676 kmp_info_t *thread;
2677// kmp_team_t *team;
2678
2679 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2680 KMP_DEBUG_ASSERT( __kmp_init_serial );
2681
2682 // Check if the kind parameter is valid, correct if needed.
2683 // Valid parameters should fit in one of two intervals - standard or extended:
2684 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2685 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2686 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2687 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2688 {
2689 // TODO: Hint needs attention in case we change the default schedule.
2690 __kmp_msg(
2691 kmp_ms_warning,
2692 KMP_MSG( ScheduleKindOutOfRange, kind ),
2693 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2694 __kmp_msg_null
2695 );
2696 kind = kmp_sched_default;
2697 chunk = 0; // ignore chunk value in case of bad kind
2698 }
2699
2700 thread = __kmp_threads[ gtid ];
2701
2702 __kmp_save_internal_controls( thread );
2703
2704 if ( kind < kmp_sched_upper_std ) {
2705 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2706 // differ static chunked vs. unchunked:
2707 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002708 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002710 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002711 }
2712 } else {
2713 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002714 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002715 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2716 }
2717 if ( kind == kmp_sched_auto ) {
2718 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002721 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722 }
2723}
2724
2725/* Gets def_sched_var ICV values */
2726void
2727__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2728{
2729 kmp_info_t *thread;
2730 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731
2732 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2733 KMP_DEBUG_ASSERT( __kmp_init_serial );
2734
2735 thread = __kmp_threads[ gtid ];
2736
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002737 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738
2739 switch ( th_type ) {
2740 case kmp_sch_static:
2741 case kmp_sch_static_greedy:
2742 case kmp_sch_static_balanced:
2743 *kind = kmp_sched_static;
2744 *chunk = 0; // chunk was not set, try to show this fact via zero value
2745 return;
2746 case kmp_sch_static_chunked:
2747 *kind = kmp_sched_static;
2748 break;
2749 case kmp_sch_dynamic_chunked:
2750 *kind = kmp_sched_dynamic;
2751 break;
2752 case kmp_sch_guided_chunked:
2753 case kmp_sch_guided_iterative_chunked:
2754 case kmp_sch_guided_analytical_chunked:
2755 *kind = kmp_sched_guided;
2756 break;
2757 case kmp_sch_auto:
2758 *kind = kmp_sched_auto;
2759 break;
2760 case kmp_sch_trapezoidal:
2761 *kind = kmp_sched_trapezoidal;
2762 break;
2763/*
2764 case kmp_sch_static_steal:
2765 *kind = kmp_sched_static_steal;
2766 break;
2767*/
2768 default:
2769 KMP_FATAL( UnknownSchedulingType, th_type );
2770 }
2771
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002772 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773}
2774
2775int
2776__kmp_get_ancestor_thread_num( int gtid, int level ) {
2777
2778 int ii, dd;
2779 kmp_team_t *team;
2780 kmp_info_t *thr;
2781
2782 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2783 KMP_DEBUG_ASSERT( __kmp_init_serial );
2784
2785 // validate level
2786 if( level == 0 ) return 0;
2787 if( level < 0 ) return -1;
2788 thr = __kmp_threads[ gtid ];
2789 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002790 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002791 if( level > ii ) return -1;
2792
2793#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002794 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795 // AC: we are in teams region where multiple nested teams have same level
2796 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2797 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2798 KMP_DEBUG_ASSERT( ii >= tlevel );
2799 // AC: As we need to pass by the teams league, we need to artificially increase ii
2800 if ( ii == tlevel ) {
2801 ii += 2; // three teams have same level
2802 } else {
2803 ii ++; // two teams have same level
2804 }
2805 }
2806 }
2807#endif
2808
2809 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2810
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002811 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002812 level++;
2813 while( ii > level )
2814 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002815 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002816 {
2817 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002818 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002819 team = team->t.t_parent;
2820 continue;
2821 }
2822 if( ii > level ) {
2823 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002824 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825 ii--;
2826 }
2827 }
2828
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830}
2831
2832int
2833__kmp_get_team_size( int gtid, int level ) {
2834
2835 int ii, dd;
2836 kmp_team_t *team;
2837 kmp_info_t *thr;
2838
2839 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2840 KMP_DEBUG_ASSERT( __kmp_init_serial );
2841
2842 // validate level
2843 if( level == 0 ) return 1;
2844 if( level < 0 ) return -1;
2845 thr = __kmp_threads[ gtid ];
2846 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002847 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848 if( level > ii ) return -1;
2849
2850#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002851 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002852 // AC: we are in teams region where multiple nested teams have same level
2853 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2854 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2855 KMP_DEBUG_ASSERT( ii >= tlevel );
2856 // AC: As we need to pass by the teams league, we need to artificially increase ii
2857 if ( ii == tlevel ) {
2858 ii += 2; // three teams have same level
2859 } else {
2860 ii ++; // two teams have same level
2861 }
2862 }
2863 }
2864#endif
2865
2866 while( ii > level )
2867 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002868 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002869 {
2870 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002871 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872 team = team->t.t_parent;
2873 continue;
2874 }
2875 if( ii > level ) {
2876 team = team->t.t_parent;
2877 ii--;
2878 }
2879 }
2880
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002881 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882}
2883
Jim Cownie5e8470a2013-09-27 10:38:44 +00002884kmp_r_sched_t
2885__kmp_get_schedule_global() {
2886// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2887// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2888
2889 kmp_r_sched_t r_sched;
2890
2891 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2892 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2893 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2894 if ( __kmp_sched == kmp_sch_static ) {
2895 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2896 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2897 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2898 } else {
2899 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2900 }
2901
2902 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2903 r_sched.chunk = KMP_DEFAULT_CHUNK;
2904 } else {
2905 r_sched.chunk = __kmp_chunk;
2906 }
2907
2908 return r_sched;
2909}
2910
2911/* ------------------------------------------------------------------------ */
2912/* ------------------------------------------------------------------------ */
2913
2914
2915/*
2916 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2917 * at least argc number of *t_argv entries for the requested team.
2918 */
2919static void
2920__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2921{
2922
2923 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002924 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925
2926 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2927 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002928 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2930 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002931
2932 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2933 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002934 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002935 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2936 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002937 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002938 if ( __kmp_storage_map ) {
2939 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2940 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2941 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2942 "team_%d.t_inline_argv",
2943 team->t.t_id );
2944 }
2945 } else {
2946 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002947 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2949 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2950 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002951 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952 if ( __kmp_storage_map ) {
2953 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2954 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2955 team->t.t_id );
2956 }
2957 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958 }
2959}
2960
2961static void
2962__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2963{
2964 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002965 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002966 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2967 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002968 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002969 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002970 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971 team->t.t_max_nproc = max_nth;
2972
2973 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002974 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002976#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002977 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2978#endif
2979 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980}
2981
2982static void
2983__kmp_free_team_arrays(kmp_team_t *team) {
2984 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2985 int i;
2986 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2987 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2988 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2989 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2990 }; // if
2991 }; // for
2992 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00002993 __kmp_free(team->t.t_disp_buffer);
2994 __kmp_free(team->t.t_dispatch);
2995 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002996 team->t.t_threads = NULL;
2997 team->t.t_disp_buffer = NULL;
2998 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002999 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003000}
3001
3002static void
3003__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3004 kmp_info_t **oldThreads = team->t.t_threads;
3005
Jonathan Peytona58563d2016-03-29 20:05:27 +00003006 __kmp_free(team->t.t_disp_buffer);
3007 __kmp_free(team->t.t_dispatch);
3008 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009 __kmp_allocate_team_arrays(team, max_nth);
3010
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003011 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003012
3013 __kmp_free(oldThreads);
3014}
3015
3016static kmp_internal_control_t
3017__kmp_get_global_icvs( void ) {
3018
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020
3021#if OMP_40_ENABLED
3022 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3023#endif /* OMP_40_ENABLED */
3024
3025 kmp_internal_control_t g_icvs = {
3026 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003027 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3028 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3029 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
3031 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003032 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3033 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3035 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036#if OMP_40_ENABLED
3037 __kmp_nested_proc_bind.bind_types[0],
3038#endif /* OMP_40_ENABLED */
3039 NULL //struct kmp_internal_control *next;
3040 };
3041
3042 return g_icvs;
3043}
3044
3045static kmp_internal_control_t
3046__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3047
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048 kmp_internal_control_t gx_icvs;
3049 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3050 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3051 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
3053 return gx_icvs;
3054}
3055
3056static void
3057__kmp_initialize_root( kmp_root_t *root )
3058{
3059 int f;
3060 kmp_team_t *root_team;
3061 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003063 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3064 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003065 KMP_DEBUG_ASSERT( root );
3066 KMP_ASSERT( ! root->r.r_begin );
3067
3068 /* setup the root state structure */
3069 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003070 root->r.r_begin = FALSE;
3071 root->r.r_active = FALSE;
3072 root->r.r_in_parallel = 0;
3073 root->r.r_blocktime = __kmp_dflt_blocktime;
3074 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003075
3076 /* setup the root team for this task */
3077 /* allocate the root team structure */
3078 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003079
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080 root_team =
3081 __kmp_allocate_team(
3082 root,
3083 1, // new_nproc
3084 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003085#if OMPT_SUPPORT
3086 0, // root parallel id
3087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003088#if OMP_40_ENABLED
3089 __kmp_nested_proc_bind.bind_types[0],
3090#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003091 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003093 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003094 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003095#if USE_DEBUGGER
3096 // Non-NULL value should be assigned to make the debugger display the root team.
3097 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3098#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099
3100 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3101
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003102 root->r.r_root_team = root_team;
3103 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104
3105 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003106 root_team->t.t_threads[0] = NULL;
3107 root_team->t.t_nproc = 1;
3108 root_team->t.t_serialized = 1;
3109 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3110 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3111 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003112 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3113 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3114
3115 /* setup the hot team for this task */
3116 /* allocate the hot team structure */
3117 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003118
Jim Cownie5e8470a2013-09-27 10:38:44 +00003119 hot_team =
3120 __kmp_allocate_team(
3121 root,
3122 1, // new_nproc
3123 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003124#if OMPT_SUPPORT
3125 0, // root parallel id
3126#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003127#if OMP_40_ENABLED
3128 __kmp_nested_proc_bind.bind_types[0],
3129#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003130 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003132 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133 );
3134 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3135
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003136 root->r.r_hot_team = hot_team;
3137 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138
3139 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003140 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141
3142 /* initialize hot team */
3143 hot_team_max_nth = hot_team->t.t_max_nproc;
3144 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003145 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003147 hot_team->t.t_nproc = 1;
3148 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3149 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3150 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003151 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003152}
3153
3154#ifdef KMP_DEBUG
3155
3156
3157typedef struct kmp_team_list_item {
3158 kmp_team_p const * entry;
3159 struct kmp_team_list_item * next;
3160} kmp_team_list_item_t;
3161typedef kmp_team_list_item_t * kmp_team_list_t;
3162
3163
3164static void
3165__kmp_print_structure_team_accum( // Add team to list of teams.
3166 kmp_team_list_t list, // List of teams.
3167 kmp_team_p const * team // Team to add.
3168) {
3169
3170 // List must terminate with item where both entry and next are NULL.
3171 // Team is added to the list only once.
3172 // List is sorted in ascending order by team id.
3173 // Team id is *not* a key.
3174
3175 kmp_team_list_t l;
3176
3177 KMP_DEBUG_ASSERT( list != NULL );
3178 if ( team == NULL ) {
3179 return;
3180 }; // if
3181
3182 __kmp_print_structure_team_accum( list, team->t.t_parent );
3183 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3184
3185 // Search list for the team.
3186 l = list;
3187 while ( l->next != NULL && l->entry != team ) {
3188 l = l->next;
3189 }; // while
3190 if ( l->next != NULL ) {
3191 return; // Team has been added before, exit.
3192 }; // if
3193
3194 // Team is not found. Search list again for insertion point.
3195 l = list;
3196 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3197 l = l->next;
3198 }; // while
3199
3200 // Insert team.
3201 {
3202 kmp_team_list_item_t * item =
3203 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3204 * item = * l;
3205 l->entry = team;
3206 l->next = item;
3207 }
3208
3209}
3210
3211static void
3212__kmp_print_structure_team(
3213 char const * title,
3214 kmp_team_p const * team
3215
3216) {
3217 __kmp_printf( "%s", title );
3218 if ( team != NULL ) {
3219 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3220 } else {
3221 __kmp_printf( " - (nil)\n" );
3222 }; // if
3223}
3224
3225static void
3226__kmp_print_structure_thread(
3227 char const * title,
3228 kmp_info_p const * thread
3229
3230) {
3231 __kmp_printf( "%s", title );
3232 if ( thread != NULL ) {
3233 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3234 } else {
3235 __kmp_printf( " - (nil)\n" );
3236 }; // if
3237}
3238
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003239void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003240__kmp_print_structure(
3241 void
3242) {
3243
3244 kmp_team_list_t list;
3245
3246 // Initialize list of teams.
3247 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3248 list->entry = NULL;
3249 list->next = NULL;
3250
3251 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3252 {
3253 int gtid;
3254 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3255 __kmp_printf( "%2d", gtid );
3256 if ( __kmp_threads != NULL ) {
3257 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3258 }; // if
3259 if ( __kmp_root != NULL ) {
3260 __kmp_printf( " %p", __kmp_root[ gtid ] );
3261 }; // if
3262 __kmp_printf( "\n" );
3263 }; // for gtid
3264 }
3265
3266 // Print out __kmp_threads array.
3267 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3268 if ( __kmp_threads != NULL ) {
3269 int gtid;
3270 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3271 kmp_info_t const * thread = __kmp_threads[ gtid ];
3272 if ( thread != NULL ) {
3273 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3274 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3275 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3276 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3277 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3278 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3279 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3280 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3281#if OMP_40_ENABLED
3282 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3283#endif
3284 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3285 __kmp_printf( "\n" );
3286 __kmp_print_structure_team_accum( list, thread->th.th_team );
3287 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3288 }; // if
3289 }; // for gtid
3290 } else {
3291 __kmp_printf( "Threads array is not allocated.\n" );
3292 }; // if
3293
3294 // Print out __kmp_root array.
3295 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3296 if ( __kmp_root != NULL ) {
3297 int gtid;
3298 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3299 kmp_root_t const * root = __kmp_root[ gtid ];
3300 if ( root != NULL ) {
3301 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3302 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3303 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3304 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3305 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3306 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3307 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3308 __kmp_printf( "\n" );
3309 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3310 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3311 }; // if
3312 }; // for gtid
3313 } else {
3314 __kmp_printf( "Ubers array is not allocated.\n" );
3315 }; // if
3316
3317 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3318 while ( list->next != NULL ) {
3319 kmp_team_p const * team = list->entry;
3320 int i;
3321 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3322 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3323 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3324 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3325 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3326 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3327 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3328 __kmp_printf( " Thread %2d: ", i );
3329 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3330 }; // for i
3331 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3332 __kmp_printf( "\n" );
3333 list = list->next;
3334 }; // while
3335
3336 // Print out __kmp_thread_pool and __kmp_team_pool.
3337 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3338 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3339 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3340 __kmp_printf( "\n" );
3341
3342 // Free team list.
3343 while ( list != NULL ) {
3344 kmp_team_list_item_t * item = list;
3345 list = list->next;
3346 KMP_INTERNAL_FREE( item );
3347 }; // while
3348
3349}
3350
3351#endif
3352
3353
3354//---------------------------------------------------------------------------
3355// Stuff for per-thread fast random number generator
3356// Table of primes
3357
3358static const unsigned __kmp_primes[] = {
3359 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3360 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3361 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3362 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3363 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3364 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3365 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3366 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3367 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3368 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3369 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3370 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3371 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3372 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3373 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3374 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3375};
3376
3377//---------------------------------------------------------------------------
3378// __kmp_get_random: Get a random number using a linear congruential method.
3379
3380unsigned short
3381__kmp_get_random( kmp_info_t * thread )
3382{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003383 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003384 unsigned short r = x>>16;
3385
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003386 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003387
3388 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3389 thread->th.th_info.ds.ds_tid, r) );
3390
3391 return r;
3392}
3393//--------------------------------------------------------
3394// __kmp_init_random: Initialize a random number generator
3395
3396void
3397__kmp_init_random( kmp_info_t * thread )
3398{
3399 unsigned seed = thread->th.th_info.ds.ds_tid;
3400
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003401 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3402 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3403 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404}
3405
3406
3407#if KMP_OS_WINDOWS
3408/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3409static int
3410__kmp_reclaim_dead_roots(void) {
3411 int i, r = 0;
3412
3413 for(i = 0; i < __kmp_threads_capacity; ++i) {
3414 if( KMP_UBER_GTID( i ) &&
3415 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3416 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3417 r += __kmp_unregister_root_other_thread(i);
3418 }
3419 }
3420 return r;
3421}
3422#endif
3423
3424/*
3425 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3426 free entries generated.
3427
3428 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3429 already dead.
3430
3431 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3432 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3433 __kmp_tp_capacity, if threadprivate cache array has been created.
3434 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3435
3436 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3437 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3438 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3439 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3440 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3441 as many free slots as possible up to nWish.
3442
3443 If any argument is negative, the behavior is undefined.
3444*/
3445static int
3446__kmp_expand_threads(int nWish, int nNeed) {
3447 int added = 0;
3448 int old_tp_cached;
3449 int __kmp_actual_max_nth;
3450
3451 if(nNeed > nWish) /* normalize the arguments */
3452 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003453#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454/* only for Windows static library */
3455 /* reclaim array entries for root threads that are already dead */
3456 added = __kmp_reclaim_dead_roots();
3457
3458 if(nNeed) {
3459 nNeed -= added;
3460 if(nNeed < 0)
3461 nNeed = 0;
3462 }
3463 if(nWish) {
3464 nWish -= added;
3465 if(nWish < 0)
3466 nWish = 0;
3467 }
3468#endif
3469 if(nWish <= 0)
3470 return added;
3471
3472 while(1) {
3473 int nTarget;
3474 int minimumRequiredCapacity;
3475 int newCapacity;
3476 kmp_info_t **newThreads;
3477 kmp_root_t **newRoot;
3478
3479 //
3480 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3481 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3482 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3483 // become > __kmp_max_nth in one of two ways:
3484 //
3485 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3486 // may not be resused by another thread, so we may need to increase
3487 // __kmp_threads_capacity to __kmp_max_threads + 1.
3488 //
3489 // 2) New foreign root(s) are encountered. We always register new
3490 // foreign roots. This may cause a smaller # of threads to be
3491 // allocated at subsequent parallel regions, but the worker threads
3492 // hang around (and eventually go to sleep) and need slots in the
3493 // __kmp_threads[] array.
3494 //
3495 // Anyway, that is the reason for moving the check to see if
3496 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3497 // instead of having it performed here. -BB
3498 //
3499 old_tp_cached = __kmp_tp_cached;
3500 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3501 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3502
3503 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3504 nTarget = nWish;
3505 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3506 /* can't fulfil nWish, so try nNeed */
3507 if(nNeed) {
3508 nTarget = nNeed;
3509 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3510 /* possible expansion too small -- give up */
3511 break;
3512 }
3513 } else {
3514 /* best-effort */
3515 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3516 if(!nTarget) {
3517 /* can expand at all -- give up */
3518 break;
3519 }
3520 }
3521 }
3522 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3523
3524 newCapacity = __kmp_threads_capacity;
3525 do{
3526 newCapacity =
3527 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3528 (newCapacity << 1) :
3529 __kmp_actual_max_nth;
3530 } while(newCapacity < minimumRequiredCapacity);
3531 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3532 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003533 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3534 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003535 memset(newThreads + __kmp_threads_capacity, 0,
3536 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3537 memset(newRoot + __kmp_threads_capacity, 0,
3538 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3539
3540 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3541 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3542 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3543 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3544 of a double-check pair.
3545 */
3546 __kmp_free(newThreads);
3547 continue; /* start over and try again */
3548 }
3549 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3550 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3551 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3552 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3553 __kmp_free(newThreads);
3554 continue; /* start over and try again */
3555 } else {
3556 /* success */
3557 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3558 //
3559 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3560 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3561 added += newCapacity - __kmp_threads_capacity;
3562 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3563 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003564 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003565 }
3566 }
3567 return added;
3568}
3569
3570/* register the current thread as a root thread and obtain our gtid */
3571/* we must have the __kmp_initz_lock held at this point */
3572/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3573int
3574__kmp_register_root( int initial_thread )
3575{
3576 kmp_info_t *root_thread;
3577 kmp_root_t *root;
3578 int gtid;
3579 int capacity;
3580 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3581 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3582 KMP_MB();
3583
3584
3585 /*
3586 2007-03-02:
3587
3588 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3589 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3590 return false (that means there is at least one empty slot in __kmp_threads array), but it
3591 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3592 used for this one. Following code workarounds this bug.
3593
3594 However, right solution seems to be not reserving slot #0 for initial thread because:
3595 (1) there is no magic in slot #0,
3596 (2) we cannot detect initial thread reliably (the first thread which does serial
3597 initialization may be not a real initial thread).
3598 */
3599 capacity = __kmp_threads_capacity;
3600 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3601 -- capacity;
3602 }; // if
3603
3604 /* see if there are too many threads */
3605 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3606 if ( __kmp_tp_cached ) {
3607 __kmp_msg(
3608 kmp_ms_fatal,
3609 KMP_MSG( CantRegisterNewThread ),
3610 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3611 KMP_HNT( PossibleSystemLimitOnThreads ),
3612 __kmp_msg_null
3613 );
3614 }
3615 else {
3616 __kmp_msg(
3617 kmp_ms_fatal,
3618 KMP_MSG( CantRegisterNewThread ),
3619 KMP_HNT( SystemLimitOnThreads ),
3620 __kmp_msg_null
3621 );
3622 }
3623 }; // if
3624
3625 /* find an available thread slot */
3626 /* Don't reassign the zero slot since we need that to only be used by initial
3627 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003628 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3629 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003630 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3631 KMP_ASSERT( gtid < __kmp_threads_capacity );
3632
3633 /* update global accounting */
3634 __kmp_all_nth ++;
3635 TCW_4(__kmp_nth, __kmp_nth + 1);
3636
3637 //
3638 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3639 // for low numbers of procs, and method #2 (keyed API call) for higher
3640 // numbers of procs.
3641 //
3642 if ( __kmp_adjust_gtid_mode ) {
3643 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3644 if ( TCR_4(__kmp_gtid_mode) != 2) {
3645 TCW_4(__kmp_gtid_mode, 2);
3646 }
3647 }
3648 else {
3649 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3650 TCW_4(__kmp_gtid_mode, 1);
3651 }
3652 }
3653 }
3654
3655#ifdef KMP_ADJUST_BLOCKTIME
3656 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003657 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003658 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3659 if ( __kmp_nth > __kmp_avail_proc ) {
3660 __kmp_zero_bt = TRUE;
3661 }
3662 }
3663#endif /* KMP_ADJUST_BLOCKTIME */
3664
3665 /* setup this new hierarchy */
3666 if( ! ( root = __kmp_root[gtid] )) {
3667 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3668 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3669 }
3670
3671 __kmp_initialize_root( root );
3672
3673 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003674 if( root->r.r_uber_thread ) {
3675 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676 } else {
3677 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3678 if ( __kmp_storage_map ) {
3679 __kmp_print_thread_storage_map( root_thread, gtid );
3680 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003681 root_thread->th.th_info .ds.ds_gtid = gtid;
3682 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003683 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003684 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 }
3686 #if USE_FAST_MEMORY
3687 __kmp_initialize_fast_memory( root_thread );
3688 #endif /* USE_FAST_MEMORY */
3689
3690 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003691 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692 __kmp_initialize_bget( root_thread );
3693 #endif
3694 __kmp_init_random( root_thread ); // Initialize random number generator
3695 }
3696
3697 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003698 if( ! root_thread->th.th_serial_team ) {
3699 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003701
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003703#if OMPT_SUPPORT
3704 0, // root parallel id
3705#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706#if OMP_40_ENABLED
3707 proc_bind_default,
3708#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003709 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003710 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003712 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003714 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003715
3716 /* drop root_thread into place */
3717 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3718
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 root->r.r_root_team->t.t_threads[0] = root_thread;
3720 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3721 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3722 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3723 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724
3725 /* initialize the thread, get it ready to go */
3726 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003727 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728
3729 /* prepare the master thread for get_gtid() */
3730 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003732#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003734#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003735
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736 #ifdef KMP_TDATA_GTID
3737 __kmp_gtid = gtid;
3738 #endif
3739 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3740 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
3742 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3743 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003744 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745 KMP_INIT_BARRIER_STATE ) );
3746 { // Initialize barrier data.
3747 int b;
3748 for ( b = 0; b < bs_last_barrier; ++ b ) {
3749 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003750#if USE_DEBUGGER
3751 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3752#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003753 }; // for
3754 }
3755 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3756
Alp Toker763b9392014-02-28 09:42:41 +00003757#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003758# if OMP_40_ENABLED
3759 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3760 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3761 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3762 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3763# endif
3764
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765 if ( TCR_4(__kmp_init_middle) ) {
3766 __kmp_affinity_set_init_mask( gtid, TRUE );
3767 }
Alp Toker763b9392014-02-28 09:42:41 +00003768#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
3770 __kmp_root_counter ++;
3771
3772 KMP_MB();
3773 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3774
3775 return gtid;
3776}
3777
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003778#if KMP_NESTED_HOT_TEAMS
3779static int
3780__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3781{
3782 int i, n, nth;
3783 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3784 if( !hot_teams || !hot_teams[level].hot_team ) {
3785 return 0;
3786 }
3787 KMP_DEBUG_ASSERT( level < max_level );
3788 kmp_team_t *team = hot_teams[level].hot_team;
3789 nth = hot_teams[level].hot_team_nth;
3790 n = nth - 1; // master is not freed
3791 if( level < max_level - 1 ) {
3792 for( i = 0; i < nth; ++i ) {
3793 kmp_info_t *th = team->t.t_threads[i];
3794 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3795 if( i > 0 && th->th.th_hot_teams ) {
3796 __kmp_free( th->th.th_hot_teams );
3797 th->th.th_hot_teams = NULL;
3798 }
3799 }
3800 }
3801 __kmp_free_team( root, team, NULL );
3802 return n;
3803}
3804#endif
3805
Jim Cownie5e8470a2013-09-27 10:38:44 +00003806/* Resets a root thread and clear its root and hot teams.
3807 Returns the number of __kmp_threads entries directly and indirectly freed.
3808*/
3809static int
3810__kmp_reset_root(int gtid, kmp_root_t *root)
3811{
3812 kmp_team_t * root_team = root->r.r_root_team;
3813 kmp_team_t * hot_team = root->r.r_hot_team;
3814 int n = hot_team->t.t_nproc;
3815 int i;
3816
3817 KMP_DEBUG_ASSERT( ! root->r.r_active );
3818
3819 root->r.r_root_team = NULL;
3820 root->r.r_hot_team = NULL;
3821 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3822 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003823 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3824#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003825 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003826 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3827 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003828 if( __kmp_hot_teams_max_level > 1 ) {
3829 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3830 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003831 if( th->th.th_hot_teams ) {
3832 __kmp_free( th->th.th_hot_teams );
3833 th->th.th_hot_teams = NULL;
3834 }
3835 }
3836 }
3837#endif
3838 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003839
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840 //
3841 // Before we can reap the thread, we need to make certain that all
3842 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3843 //
3844 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3845 __kmp_wait_to_unref_task_teams();
3846 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003847
3848 #if KMP_OS_WINDOWS
3849 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3850 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3851 (LPVOID)&(root->r.r_uber_thread->th),
3852 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3853 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3854 #endif /* KMP_OS_WINDOWS */
3855
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003856#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003857 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003858 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3859 int gtid = __kmp_get_gtid();
3860 __ompt_thread_end(ompt_thread_initial, gtid);
3861 }
3862#endif
3863
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3865 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3866
3867 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3868 root->r.r_uber_thread = NULL;
3869 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003870 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871
3872 return n;
3873}
3874
3875void
3876__kmp_unregister_root_current_thread( int gtid )
3877{
Jim Cownie77c2a632014-09-03 11:34:33 +00003878 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003879 /* this lock should be ok, since unregister_root_current_thread is never called during
3880 * and abort, only during a normal close. furthermore, if you have the
3881 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003882
3883 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3884 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3885 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3886 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3887 return;
3888 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003889 kmp_root_t *root = __kmp_root[gtid];
3890
Jim Cownie5e8470a2013-09-27 10:38:44 +00003891 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3892 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3893 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3894 KMP_ASSERT( root->r.r_active == FALSE );
3895
Jim Cownie5e8470a2013-09-27 10:38:44 +00003896
3897 KMP_MB();
3898
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003899#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003900 kmp_info_t * thread = __kmp_threads[gtid];
3901 kmp_team_t * team = thread->th.th_team;
3902 kmp_task_team_t * task_team = thread->th.th_task_team;
3903
3904 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003905 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3906#if OMPT_SUPPORT
3907 // the runtime is shutting down so we won't report any events
3908 thread->th.ompt_thread_info.state = ompt_state_undefined;
3909#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003910 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003911 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003912#endif
3913
Jim Cownie5e8470a2013-09-27 10:38:44 +00003914 __kmp_reset_root(gtid, root);
3915
3916 /* free up this thread slot */
3917 __kmp_gtid_set_specific( KMP_GTID_DNE );
3918#ifdef KMP_TDATA_GTID
3919 __kmp_gtid = KMP_GTID_DNE;
3920#endif
3921
3922 KMP_MB();
3923 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3924
3925 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3926}
3927
Jonathan Peyton2321d572015-06-08 19:25:25 +00003928#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003929/* __kmp_forkjoin_lock must be already held
3930 Unregisters a root thread that is not the current thread. Returns the number of
3931 __kmp_threads entries freed as a result.
3932 */
3933static int
3934__kmp_unregister_root_other_thread( int gtid )
3935{
3936 kmp_root_t *root = __kmp_root[gtid];
3937 int r;
3938
3939 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3940 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3941 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3942 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3943 KMP_ASSERT( root->r.r_active == FALSE );
3944
3945 r = __kmp_reset_root(gtid, root);
3946 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3947 return r;
3948}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003949#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003950
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951#if KMP_DEBUG
3952void __kmp_task_info() {
3953
3954 kmp_int32 gtid = __kmp_entry_gtid();
3955 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3956 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003957 kmp_team_t *steam = this_thr->th.th_serial_team;
3958 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003959
3960 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3961 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3962}
3963#endif // KMP_DEBUG
3964
Jim Cownie5e8470a2013-09-27 10:38:44 +00003965/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003966 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003967 * initialization of features as much as possible */
3968static void
3969__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3970{
3971 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3972 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003973 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003975 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003977 KMP_DEBUG_ASSERT( team->t.t_threads );
3978 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3979 KMP_DEBUG_ASSERT( master );
3980 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003981
3982 KMP_MB();
3983
3984 TCW_SYNC_PTR(this_thr->th.th_team, team);
3985
3986 this_thr->th.th_info.ds.ds_tid = tid;
3987 this_thr->th.th_set_nproc = 0;
3988#if OMP_40_ENABLED
3989 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00003990# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003991 this_thr->th.th_new_place = this_thr->th.th_current_place;
3992# endif
3993#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995
3996 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003997 this_thr->th.th_team_nproc = team->t.t_nproc;
3998 this_thr->th.th_team_master = master;
3999 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4001
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004002 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003
4004 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4005 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4006
4007 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4008
4009 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4010 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4011 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004012
4013 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004014 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015
4016 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004017
4018#ifdef BUILD_TV
4019 this_thr->th.th_local.tv_data = 0;
4020#endif
4021
4022 if ( ! this_thr->th.th_pri_common ) {
4023 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4024 if ( __kmp_storage_map ) {
4025 __kmp_print_storage_map_gtid(
4026 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4027 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4028 );
4029 }; // if
4030 this_thr->th.th_pri_head = NULL;
4031 }; // if
4032
4033 /* Initialize dynamic dispatch */
4034 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004035 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004036 /*
4037 * Use team max_nproc since this will never change for the team.
4038 */
4039 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004040 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004041 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4042 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004043 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004044 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4045
4046 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004047#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004048 dispatch->th_doacross_buf_idx = 0;
4049#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004050 if( ! dispatch->th_disp_buffer ) {
4051 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004052
4053 if ( __kmp_storage_map ) {
4054 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004055 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004056 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4057 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4058 gtid, team->t.t_id, gtid );
4059 }
4060 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004061 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004062 }
4063
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004064 dispatch->th_dispatch_pr_current = 0;
4065 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004067 dispatch->th_deo_fcn = 0; /* ORDERED */
4068 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069 }
4070
4071 this_thr->th.th_next_pool = NULL;
4072
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004073 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004074 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004075 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4076 this_thr->th.th_task_state_top = 0;
4077 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004078 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4079 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004080 }
4081
Jim Cownie5e8470a2013-09-27 10:38:44 +00004082 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4083 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4084
4085 KMP_MB();
4086}
4087
4088
4089/* allocate a new thread for the requesting team. this is only called from within a
4090 * forkjoin critical section. we will first try to get an available thread from the
4091 * thread pool. if none is available, we will fork a new one assuming we are able
4092 * to create a new one. this should be assured, as the caller should check on this
4093 * first.
4094 */
4095kmp_info_t *
4096__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4097{
4098 kmp_team_t *serial_team;
4099 kmp_info_t *new_thr;
4100 int new_gtid;
4101
4102 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4103 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004104#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004105 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004106#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004107 KMP_MB();
4108
4109 /* first, try to get one from the thread pool */
4110 if ( __kmp_thread_pool ) {
4111
4112 new_thr = (kmp_info_t*)__kmp_thread_pool;
4113 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4114 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4115 __kmp_thread_pool_insert_pt = NULL;
4116 }
4117 TCW_4(new_thr->th.th_in_pool, FALSE);
4118 //
4119 // Don't touch th_active_in_pool or th_active.
4120 // The worker thread adjusts those flags as it sleeps/awakens.
4121 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 __kmp_thread_pool_nth--;
4123
4124 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4125 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004126 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4128 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4129
4130 /* setup the thread structure */
4131 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4132 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4133
4134 TCW_4(__kmp_nth, __kmp_nth + 1);
4135
Jonathan Peyton54127982015-11-04 21:37:48 +00004136 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004137 new_thr->th.th_task_state_top = 0;
4138 new_thr->th.th_task_state_stack_sz = 4;
4139
Jim Cownie5e8470a2013-09-27 10:38:44 +00004140#ifdef KMP_ADJUST_BLOCKTIME
4141 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004142 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004143 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4144 if ( __kmp_nth > __kmp_avail_proc ) {
4145 __kmp_zero_bt = TRUE;
4146 }
4147 }
4148#endif /* KMP_ADJUST_BLOCKTIME */
4149
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004150#if KMP_DEBUG
4151 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4152 int b;
4153 kmp_balign_t * balign = new_thr->th.th_bar;
4154 for( b = 0; b < bs_last_barrier; ++ b )
4155 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4156#endif
4157
Jim Cownie5e8470a2013-09-27 10:38:44 +00004158 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4159 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4160
4161 KMP_MB();
4162 return new_thr;
4163 }
4164
4165
4166 /* no, well fork a new one */
4167 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4168 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4169
4170 //
4171 // If this is the first worker thread the RTL is creating, then also
4172 // launch the monitor thread. We try to do this as early as possible.
4173 //
4174 if ( ! TCR_4( __kmp_init_monitor ) ) {
4175 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4176 if ( ! TCR_4( __kmp_init_monitor ) ) {
4177 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4178 TCW_4( __kmp_init_monitor, 1 );
4179 __kmp_create_monitor( & __kmp_monitor );
4180 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004181 #if KMP_OS_WINDOWS
4182 // AC: wait until monitor has started. This is a fix for CQ232808.
4183 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4184 // work in between, then there is high probability that monitor thread started after
4185 // the library shutdown. At shutdown it is too late to cope with the problem, because
4186 // when the master is in DllMain (process detach) the monitor has no chances to start
4187 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4188 // because all the memory which the monitor can access is going to be released/reset.
4189 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4190 KMP_YIELD( TRUE );
4191 }
4192 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4193 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194 }
4195 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4196 }
4197
4198 KMP_MB();
4199 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4200 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4201 }
4202
4203 /* allocate space for it. */
4204 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4205
4206 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4207
4208 if ( __kmp_storage_map ) {
4209 __kmp_print_thread_storage_map( new_thr, new_gtid );
4210 }
4211
4212 /* add the reserve serialized team, initialized from the team's master thread */
4213 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004214 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004215 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004216
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004217 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004219#if OMPT_SUPPORT
4220 0, // root parallel id
4221#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222#if OMP_40_ENABLED
4223 proc_bind_default,
4224#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004226 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004227 }
4228 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4230 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004231 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4232 new_thr ) );
4233
4234 /* setup the thread structures */
4235 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4236
4237 #if USE_FAST_MEMORY
4238 __kmp_initialize_fast_memory( new_thr );
4239 #endif /* USE_FAST_MEMORY */
4240
4241 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004242 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004243 __kmp_initialize_bget( new_thr );
4244 #endif
4245
4246 __kmp_init_random( new_thr ); // Initialize random number generator
4247
4248 /* Initialize these only once when thread is grabbed for a team allocation */
4249 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4250 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4251
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004252 int b;
4253 kmp_balign_t * balign = new_thr->th.th_bar;
4254 for(b=0; b<bs_last_barrier; ++b) {
4255 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4256 balign[b].bb.team = NULL;
4257 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4258 balign[b].bb.use_oncore_barrier = 0;
4259 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004260
4261 new_thr->th.th_spin_here = FALSE;
4262 new_thr->th.th_next_waiting = 0;
4263
Alp Toker98758b02014-03-02 04:12:06 +00004264#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004265 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4266 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4267 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4268 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4269#endif
4270
4271 TCW_4(new_thr->th.th_in_pool, FALSE);
4272 new_thr->th.th_active_in_pool = FALSE;
4273 TCW_4(new_thr->th.th_active, TRUE);
4274
4275 /* adjust the global counters */
4276 __kmp_all_nth ++;
4277 __kmp_nth ++;
4278
4279 //
4280 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4281 // for low numbers of procs, and method #2 (keyed API call) for higher
4282 // numbers of procs.
4283 //
4284 if ( __kmp_adjust_gtid_mode ) {
4285 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4286 if ( TCR_4(__kmp_gtid_mode) != 2) {
4287 TCW_4(__kmp_gtid_mode, 2);
4288 }
4289 }
4290 else {
4291 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4292 TCW_4(__kmp_gtid_mode, 1);
4293 }
4294 }
4295 }
4296
4297#ifdef KMP_ADJUST_BLOCKTIME
4298 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004299 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004300 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4301 if ( __kmp_nth > __kmp_avail_proc ) {
4302 __kmp_zero_bt = TRUE;
4303 }
4304 }
4305#endif /* KMP_ADJUST_BLOCKTIME */
4306
4307 /* actually fork it and create the new worker thread */
4308 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4309 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4310 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4311
Jim Cownie5e8470a2013-09-27 10:38:44 +00004312 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4313 KMP_MB();
4314 return new_thr;
4315}
4316
4317/*
4318 * reinitialize team for reuse.
4319 *
4320 * The hot team code calls this case at every fork barrier, so EPCC barrier
4321 * test are extremely sensitive to changes in it, esp. writes to the team
4322 * struct, which cause a cache invalidation in all threads.
4323 *
4324 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4325 */
4326static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004327__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004328 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4329 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004330 KMP_DEBUG_ASSERT( team && new_icvs);
4331 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004332 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004333
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004334 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004335
Jim Cownie181b4bb2013-12-23 17:28:57 +00004336 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004337 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004338 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004339
4340 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4341 team->t.t_threads[0], team ) );
4342}
4343
Jim Cownie5e8470a2013-09-27 10:38:44 +00004344
4345/* initialize the team data structure
4346 * this assumes the t_threads and t_max_nproc are already set
4347 * also, we don't touch the arguments */
4348static void
4349__kmp_initialize_team(
4350 kmp_team_t * team,
4351 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004352 kmp_internal_control_t * new_icvs,
4353 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004354) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004355 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4356
Jim Cownie5e8470a2013-09-27 10:38:44 +00004357 /* verify */
4358 KMP_DEBUG_ASSERT( team );
4359 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4360 KMP_DEBUG_ASSERT( team->t.t_threads );
4361 KMP_MB();
4362
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004363 team->t.t_master_tid = 0; /* not needed */
4364 /* team->t.t_master_bar; not needed */
4365 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4366 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004367
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004368 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4369 team->t.t_next_pool = NULL;
4370 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004371
4372 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004373 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004374
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004375 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4376 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004377
4378#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004379 team->t.t_fp_control_saved = FALSE; /* not needed */
4380 team->t.t_x87_fpu_control_word = 0; /* not needed */
4381 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004382#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4383
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004384 team->t.t_construct = 0;
4385 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004387 team->t.t_ordered .dt.t_value = 0;
4388 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004389
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004391
4392#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004393 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004395 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004396
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004397 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004399 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004400
4401 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004402 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403}
4404
Alp Toker98758b02014-03-02 04:12:06 +00004405#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406/* Sets full mask for thread and returns old mask, no changes to structures. */
4407static void
4408__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4409{
4410 if ( KMP_AFFINITY_CAPABLE() ) {
4411 int status;
4412 if ( old_mask != NULL ) {
4413 status = __kmp_get_system_affinity( old_mask, TRUE );
4414 int error = errno;
4415 if ( status != 0 ) {
4416 __kmp_msg(
4417 kmp_ms_fatal,
4418 KMP_MSG( ChangeThreadAffMaskError ),
4419 KMP_ERR( error ),
4420 __kmp_msg_null
4421 );
4422 }
4423 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004424 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425 }
4426}
4427#endif
4428
Alp Toker98758b02014-03-02 04:12:06 +00004429#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430
4431//
4432// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4433// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004434// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435// The master thread's partition should already include its current binding.
4436//
4437static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004438__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004439{
4440 //
4441 // Copy the master thread's place partion to the team struct
4442 //
4443 kmp_info_t *master_th = team->t.t_threads[0];
4444 KMP_DEBUG_ASSERT( master_th != NULL );
4445 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4446 int first_place = master_th->th.th_first_place;
4447 int last_place = master_th->th.th_last_place;
4448 int masters_place = master_th->th.th_current_place;
4449 team->t.t_first_place = first_place;
4450 team->t.t_last_place = last_place;
4451
4452 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4453 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4454 masters_place, first_place, last_place ) );
4455
4456 switch ( proc_bind ) {
4457
4458 case proc_bind_default:
4459 //
4460 // serial teams might have the proc_bind policy set to
4461 // proc_bind_default. It doesn't matter, as we don't
4462 // rebind the master thread for any proc_bind policy.
4463 //
4464 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4465 break;
4466
4467 case proc_bind_master:
4468 {
4469 int f;
4470 int n_th = team->t.t_nproc;
4471 for ( f = 1; f < n_th; f++ ) {
4472 kmp_info_t *th = team->t.t_threads[f];
4473 KMP_DEBUG_ASSERT( th != NULL );
4474 th->th.th_first_place = first_place;
4475 th->th.th_last_place = last_place;
4476 th->th.th_new_place = masters_place;
4477
4478 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4479 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4480 team->t.t_id, f, masters_place, first_place, last_place ) );
4481 }
4482 }
4483 break;
4484
4485 case proc_bind_close:
4486 {
4487 int f;
4488 int n_th = team->t.t_nproc;
4489 int n_places;
4490 if ( first_place <= last_place ) {
4491 n_places = last_place - first_place + 1;
4492 }
4493 else {
4494 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4495 }
4496 if ( n_th <= n_places ) {
4497 int place = masters_place;
4498 for ( f = 1; f < n_th; f++ ) {
4499 kmp_info_t *th = team->t.t_threads[f];
4500 KMP_DEBUG_ASSERT( th != NULL );
4501
4502 if ( place == last_place ) {
4503 place = first_place;
4504 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004505 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004506 place = 0;
4507 }
4508 else {
4509 place++;
4510 }
4511 th->th.th_first_place = first_place;
4512 th->th.th_last_place = last_place;
4513 th->th.th_new_place = place;
4514
4515 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4516 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4517 team->t.t_id, f, place, first_place, last_place ) );
4518 }
4519 }
4520 else {
4521 int S, rem, gap, s_count;
4522 S = n_th / n_places;
4523 s_count = 0;
4524 rem = n_th - ( S * n_places );
4525 gap = rem > 0 ? n_places/rem : n_places;
4526 int place = masters_place;
4527 int gap_ct = gap;
4528 for ( f = 0; f < n_th; f++ ) {
4529 kmp_info_t *th = team->t.t_threads[f];
4530 KMP_DEBUG_ASSERT( th != NULL );
4531
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4535 s_count++;
4536
4537 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4538 // do nothing, add an extra thread to place on next iteration
4539 }
4540 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4541 // we added an extra thread to this place; move to next place
4542 if ( place == last_place ) {
4543 place = first_place;
4544 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004545 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004546 place = 0;
4547 }
4548 else {
4549 place++;
4550 }
4551 s_count = 0;
4552 gap_ct = 1;
4553 rem--;
4554 }
4555 else if (s_count == S) { // place full; don't add extra
4556 if ( place == last_place ) {
4557 place = first_place;
4558 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004559 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004560 place = 0;
4561 }
4562 else {
4563 place++;
4564 }
4565 gap_ct++;
4566 s_count = 0;
4567 }
4568
4569 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4570 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4571 team->t.t_id, f, th->th.th_new_place, first_place,
4572 last_place ) );
4573 }
4574 KMP_DEBUG_ASSERT( place == masters_place );
4575 }
4576 }
4577 break;
4578
4579 case proc_bind_spread:
4580 {
4581 int f;
4582 int n_th = team->t.t_nproc;
4583 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004584 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004585 if ( first_place <= last_place ) {
4586 n_places = last_place - first_place + 1;
4587 }
4588 else {
4589 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4590 }
4591 if ( n_th <= n_places ) {
4592 int place = masters_place;
4593 int S = n_places/n_th;
4594 int s_count, rem, gap, gap_ct;
4595 rem = n_places - n_th*S;
4596 gap = rem ? n_th/rem : 1;
4597 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004598 thidx = n_th;
4599 if (update_master_only == 1)
4600 thidx = 1;
4601 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004602 kmp_info_t *th = team->t.t_threads[f];
4603 KMP_DEBUG_ASSERT( th != NULL );
4604
4605 th->th.th_first_place = place;
4606 th->th.th_new_place = place;
4607 s_count = 1;
4608 while (s_count < S) {
4609 if ( place == last_place ) {
4610 place = first_place;
4611 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004612 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004613 place = 0;
4614 }
4615 else {
4616 place++;
4617 }
4618 s_count++;
4619 }
4620 if (rem && (gap_ct == gap)) {
4621 if ( place == last_place ) {
4622 place = first_place;
4623 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004624 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004625 place = 0;
4626 }
4627 else {
4628 place++;
4629 }
4630 rem--;
4631 gap_ct = 0;
4632 }
4633 th->th.th_last_place = place;
4634 gap_ct++;
4635
4636 if ( place == last_place ) {
4637 place = first_place;
4638 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004639 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004640 place = 0;
4641 }
4642 else {
4643 place++;
4644 }
4645
4646 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place ) );
4650 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004651 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004652 }
4653 else {
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4656 s_count = 0;
4657 rem = n_th - ( S * n_places );
4658 gap = rem > 0 ? n_places/rem : n_places;
4659 int place = masters_place;
4660 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004661 thidx = n_th;
4662 if (update_master_only == 1)
4663 thidx = 1;
4664 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT( th != NULL );
4667
4668 th->th.th_first_place = place;
4669 th->th.th_last_place = place;
4670 th->th.th_new_place = place;
4671 s_count++;
4672
4673 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4674 // do nothing, add an extra thread to place on next iteration
4675 }
4676 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4677 // we added an extra thread to this place; move on to next place
4678 if ( place == last_place ) {
4679 place = first_place;
4680 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004681 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004682 place = 0;
4683 }
4684 else {
4685 place++;
4686 }
4687 s_count = 0;
4688 gap_ct = 1;
4689 rem--;
4690 }
4691 else if (s_count == S) { // place is full; don't add extra thread
4692 if ( place == last_place ) {
4693 place = first_place;
4694 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004695 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004696 place = 0;
4697 }
4698 else {
4699 place++;
4700 }
4701 gap_ct++;
4702 s_count = 0;
4703 }
4704
4705 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4706 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4707 team->t.t_id, f, th->th.th_new_place,
4708 th->th.th_first_place, th->th.th_last_place) );
4709 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004710 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004711 }
4712 }
4713 break;
4714
4715 default:
4716 break;
4717 }
4718
4719 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4720}
4721
Alp Toker98758b02014-03-02 04:12:06 +00004722#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004723
4724/* allocate a new team data structure to use. take one off of the free pool if available */
4725kmp_team_t *
4726__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004727#if OMPT_SUPPORT
4728 ompt_parallel_id_t ompt_parallel_id,
4729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004730#if OMP_40_ENABLED
4731 kmp_proc_bind_t new_proc_bind,
4732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004733 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004734 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004735{
Jonathan Peyton45be4502015-08-11 21:36:41 +00004736 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004737 int f;
4738 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004739 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004740 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004741
4742 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4743 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4744 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4745 KMP_MB();
4746
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004747#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004748 kmp_hot_team_ptr_t *hot_teams;
4749 if( master ) {
4750 team = master->th.th_team;
4751 level = team->t.t_active_level;
4752 if( master->th.th_teams_microtask ) { // in teams construct?
4753 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4754 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4755 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4756 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4757 }
4758 }
4759 hot_teams = master->th.th_hot_teams;
4760 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4761 { // hot team has already been allocated for given level
4762 use_hot_team = 1;
4763 } else {
4764 use_hot_team = 0;
4765 }
4766 }
4767#endif
4768 // Optimization to use a "hot" team
4769 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004770 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004771#if KMP_NESTED_HOT_TEAMS
4772 team = hot_teams[level].hot_team;
4773#else
4774 team = root->r.r_hot_team;
4775#endif
4776#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004777 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004778 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4779 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004780 }
4781#endif
4782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004783 // Has the number of threads changed?
4784 /* Let's assume the most common case is that the number of threads is unchanged, and
4785 put that case first. */
4786 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4787 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004788 // This case can mean that omp_set_num_threads() was called and the hot team size
4789 // was already reduced, so we check the special flag
4790 if ( team->t.t_size_changed == -1 ) {
4791 team->t.t_size_changed = 1;
4792 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004794 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004795
4796 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004797 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4799 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004800 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004801
4802 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4803
4804 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4805 0, team->t.t_threads[0], team ) );
4806 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4807
4808#if OMP_40_ENABLED
4809# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004810 if ( ( team->t.t_size_changed == 0 )
4811 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004812 if (new_proc_bind == proc_bind_spread) {
4813 __kmp_partition_places(team, 1); // add flag to update only master for spread
4814 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004815 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4816 team->t.t_id, new_proc_bind, team->t.t_first_place,
4817 team->t.t_last_place ) );
4818 }
4819 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004820 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004821 __kmp_partition_places( team );
4822 }
4823# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004824 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004825# endif /* KMP_AFFINITY_SUPPORTED */
4826#endif /* OMP_40_ENABLED */
4827 }
4828 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004829 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4830
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004831 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004832#if KMP_NESTED_HOT_TEAMS
4833 if( __kmp_hot_teams_mode == 0 ) {
4834 // AC: saved number of threads should correspond to team's value in this mode,
4835 // can be bigger in mode 1, when hot team has some threads in reserve
4836 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4837 hot_teams[level].hot_team_nth = new_nproc;
4838#endif // KMP_NESTED_HOT_TEAMS
4839 /* release the extra threads we don't need any more */
4840 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4841 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004842 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4843 // When decreasing team size, threads no longer in the team should unref task team.
4844 team->t.t_threads[f]->th.th_task_team = NULL;
4845 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004846 __kmp_free_thread( team->t.t_threads[ f ] );
4847 team->t.t_threads[ f ] = NULL;
4848 }
4849#if KMP_NESTED_HOT_TEAMS
4850 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004851 else {
4852 // When keeping extra threads in team, switch threads to wait on own b_go flag
4853 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4854 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4855 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4856 for (int b=0; b<bs_last_barrier; ++b) {
4857 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4858 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4859 }
4860 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4861 }
4862 }
4863 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004864#endif // KMP_NESTED_HOT_TEAMS
4865 team->t.t_nproc = new_nproc;
4866 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004867 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4868 team->t.t_sched.chunk != new_icvs->sched.chunk)
4869 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004870 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004871
Jim Cownie5e8470a2013-09-27 10:38:44 +00004872 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004873 for(f = 0; f < new_nproc; ++f) {
4874 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004875 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004876 // restore the current task state of the master thread: should be the implicit task
4877 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4878 0, team->t.t_threads[0], team ) );
4879
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004880 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004881
4882#ifdef KMP_DEBUG
4883 for ( f = 0; f < team->t.t_nproc; f++ ) {
4884 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4885 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4886 }
4887#endif
4888
4889#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004890 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004891# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004892 __kmp_partition_places( team );
4893# endif
4894#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004895 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004896 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004897#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004898 kmp_affin_mask_t *old_mask;
4899 if ( KMP_AFFINITY_CAPABLE() ) {
4900 KMP_CPU_ALLOC(old_mask);
4901 }
4902#endif
4903
4904 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4905
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004906 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004907
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004908#if KMP_NESTED_HOT_TEAMS
4909 int avail_threads = hot_teams[level].hot_team_nth;
4910 if( new_nproc < avail_threads )
4911 avail_threads = new_nproc;
4912 kmp_info_t **other_threads = team->t.t_threads;
4913 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4914 // Adjust barrier data of reserved threads (if any) of the team
4915 // Other data will be set in __kmp_initialize_info() below.
4916 int b;
4917 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4918 for ( b = 0; b < bs_last_barrier; ++ b ) {
4919 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4920 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004921#if USE_DEBUGGER
4922 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4923#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004924 }
4925 }
4926 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4927 // we have all needed threads in reserve, no need to allocate any
4928 // this only possible in mode 1, cannot have reserved threads in mode 0
4929 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4930 team->t.t_nproc = new_nproc; // just get reserved threads involved
4931 } else {
4932 // we may have some threads in reserve, but not enough
4933 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4934 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4935#endif // KMP_NESTED_HOT_TEAMS
4936 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004937 /* reallocate larger arrays */
4938 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004939 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004940 }
4941
Alp Toker98758b02014-03-02 04:12:06 +00004942#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004943 /* Temporarily set full mask for master thread before
4944 creation of workers. The reason is that workers inherit
4945 the affinity from master, so if a lot of workers are
4946 created on the single core quickly, they don't get
4947 a chance to set their own affinity for a long time.
4948 */
4949 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4950#endif
4951
4952 /* allocate new threads for the hot team */
4953 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4954 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4955 KMP_DEBUG_ASSERT( new_worker );
4956 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004957
Jonathan Peytond26e2132015-09-10 18:44:30 +00004958 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004959 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4960 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4961 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4962
4963 { // Initialize barrier data for new threads.
4964 int b;
4965 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004966 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004967 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004968 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004969#if USE_DEBUGGER
4970 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4971#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004972 }
4973 }
4974 }
4975
Alp Toker98758b02014-03-02 04:12:06 +00004976#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004977 if ( KMP_AFFINITY_CAPABLE() ) {
4978 /* Restore initial master thread's affinity mask */
4979 __kmp_set_system_affinity( old_mask, TRUE );
4980 KMP_CPU_FREE(old_mask);
4981 }
4982#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004983#if KMP_NESTED_HOT_TEAMS
4984 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4985#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00004987 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004988 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004989
Jonathan Peytone03b62f2015-10-08 18:49:40 +00004990 /* reinitialize the threads */
4991 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00004992 for (f=0; f < team->t.t_nproc; ++f)
4993 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4994 if (level) { // set th_task_state for new threads in nested hot team
4995 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00004996 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00004997 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
4998 for (f=old_nproc; f < team->t.t_nproc; ++f)
4999 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005000 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005001 else { // set th_task_state for new threads in non-nested hot team
5002 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5003 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005004 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005005 }
5006
Jim Cownie5e8470a2013-09-27 10:38:44 +00005007#ifdef KMP_DEBUG
5008 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5009 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5010 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5011 }
5012#endif
5013
5014#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005015 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005016# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005017 __kmp_partition_places( team );
5018# endif
5019#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005020 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005021
5022#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005023 kmp_info_t *master = team->t.t_threads[0];
5024 if( master->th.th_teams_microtask ) {
5025 for( f = 1; f < new_nproc; ++f ) {
5026 // propagate teams construct specific info to workers
5027 kmp_info_t *thr = team->t.t_threads[f];
5028 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5029 thr->th.th_teams_level = master->th.th_teams_level;
5030 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005032 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005033#endif /* OMP_40_ENABLED */
5034#if KMP_NESTED_HOT_TEAMS
5035 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005036 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005037 for( f = 1; f < new_nproc; ++f ) {
5038 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005039 int b;
5040 kmp_balign_t * balign = thr->th.th_bar;
5041 for( b = 0; b < bs_last_barrier; ++ b ) {
5042 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5043 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005044#if USE_DEBUGGER
5045 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5046#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005047 }
5048 }
5049 }
5050#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005051
5052 /* reallocate space for arguments if necessary */
5053 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005054 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005055 //
5056 // The hot team re-uses the previous task team,
5057 // if untouched during the previous release->gather phase.
5058 //
5059
5060 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5061
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005062#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005063 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005064 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5065 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005066 }
5067#endif
5068
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005069#if OMPT_SUPPORT
5070 __ompt_team_assign_id(team, ompt_parallel_id);
5071#endif
5072
Jim Cownie5e8470a2013-09-27 10:38:44 +00005073 KMP_MB();
5074
5075 return team;
5076 }
5077
5078 /* next, let's try to take one from the team pool */
5079 KMP_MB();
5080 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5081 {
5082 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5083 if ( team->t.t_max_nproc >= max_nproc ) {
5084 /* take this team from the team pool */
5085 __kmp_team_pool = team->t.t_next_pool;
5086
5087 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005088 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005089
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005090 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5091 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5092 team->t.t_task_team[0] = NULL;
5093 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005094
5095 /* reallocate space for arguments if necessary */
5096 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005097 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005098
5099 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5100 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5101 { // Initialize barrier data.
5102 int b;
5103 for ( b = 0; b < bs_last_barrier; ++ b) {
5104 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005105#if USE_DEBUGGER
5106 team->t.t_bar[ b ].b_master_arrived = 0;
5107 team->t.t_bar[ b ].b_team_arrived = 0;
5108#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005109 }
5110 }
5111
5112#if OMP_40_ENABLED
5113 team->t.t_proc_bind = new_proc_bind;
5114#endif
5115
5116 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005117
5118#if OMPT_SUPPORT
5119 __ompt_team_assign_id(team, ompt_parallel_id);
5120#endif
5121
Jim Cownie5e8470a2013-09-27 10:38:44 +00005122 KMP_MB();
5123
5124 return team;
5125 }
5126
5127 /* reap team if it is too small, then loop back and check the next one */
5128 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5129 /* TODO: Use technique to find the right size hot-team, don't reap them */
5130 team = __kmp_reap_team( team );
5131 __kmp_team_pool = team;
5132 }
5133
5134 /* nothing available in the pool, no matter, make a new team! */
5135 KMP_MB();
5136 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5137
5138 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005139 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005140 /* NOTE well, for some reason allocating one big buffer and dividing it
5141 * up seems to really hurt performance a lot on the P4, so, let's not use
5142 * this... */
5143 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005144
5145 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005146 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005147
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005148 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5149 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5150 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5151 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005152
5153 if ( __kmp_storage_map ) {
5154 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5155 }
5156
5157 /* allocate space for arguments */
5158 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005159 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005160
5161 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5162 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5163 { // Initialize barrier data.
5164 int b;
5165 for ( b = 0; b < bs_last_barrier; ++ b ) {
5166 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005167#if USE_DEBUGGER
5168 team->t.t_bar[ b ].b_master_arrived = 0;
5169 team->t.t_bar[ b ].b_team_arrived = 0;
5170#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005171 }
5172 }
5173
5174#if OMP_40_ENABLED
5175 team->t.t_proc_bind = new_proc_bind;
5176#endif
5177
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005178#if OMPT_SUPPORT
5179 __ompt_team_assign_id(team, ompt_parallel_id);
5180 team->t.ompt_serialized_team_info = NULL;
5181#endif
5182
Jim Cownie5e8470a2013-09-27 10:38:44 +00005183 KMP_MB();
5184
5185 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5186
5187 return team;
5188}
5189
5190/* TODO implement hot-teams at all levels */
5191/* TODO implement lazy thread release on demand (disband request) */
5192
5193/* free the team. return it to the team pool. release all the threads
5194 * associated with it */
5195void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005196__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005197{
5198 int f;
5199 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5200
5201 /* verify state */
5202 KMP_DEBUG_ASSERT( root );
5203 KMP_DEBUG_ASSERT( team );
5204 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5205 KMP_DEBUG_ASSERT( team->t.t_threads );
5206
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005207 int use_hot_team = team == root->r.r_hot_team;
5208#if KMP_NESTED_HOT_TEAMS
5209 int level;
5210 kmp_hot_team_ptr_t *hot_teams;
5211 if( master ) {
5212 level = team->t.t_active_level - 1;
5213 if( master->th.th_teams_microtask ) { // in teams construct?
5214 if( master->th.th_teams_size.nteams > 1 ) {
5215 ++level; // level was not increased in teams construct for team_of_masters
5216 }
5217 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5218 master->th.th_teams_level == team->t.t_level ) {
5219 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5220 } // team->t.t_level will be increased inside parallel
5221 }
5222 hot_teams = master->th.th_hot_teams;
5223 if( level < __kmp_hot_teams_max_level ) {
5224 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5225 use_hot_team = 1;
5226 }
5227 }
5228#endif // KMP_NESTED_HOT_TEAMS
5229
Jim Cownie5e8470a2013-09-27 10:38:44 +00005230 /* team is done working */
5231 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005232 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005233 // Do not reset pointer to parent team to NULL for hot teams.
5234
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005235 /* if we are non-hot team, release our threads */
5236 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005237 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005238 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005239 int tt_idx;
5240 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005241 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5242 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005243 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5244 team->t.t_threads[f]->th.th_task_team = NULL;
5245 }
5246 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005247#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005248 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005249#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005250 team->t.t_task_team[tt_idx] = NULL;
5251 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005252 }
5253 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005254
5255 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005256 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005257 team->t.t_level = 0;
5258 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005259
Jim Cownie5e8470a2013-09-27 10:38:44 +00005260 /* free the worker threads */
5261 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5262 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5263 __kmp_free_thread( team->t.t_threads[ f ] );
5264 team->t.t_threads[ f ] = NULL;
5265 }
5266
Jim Cownie5e8470a2013-09-27 10:38:44 +00005267 /* put the team back in the team pool */
5268 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005269 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005270 __kmp_team_pool = (volatile kmp_team_t*) team;
5271 }
5272
5273 KMP_MB();
5274}
5275
5276
5277/* reap the team. destroy it, reclaim all its resources and free its memory */
5278kmp_team_t *
5279__kmp_reap_team( kmp_team_t *team )
5280{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005281 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005282
5283 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005284 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5285 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5286 KMP_DEBUG_ASSERT( team->t.t_threads );
5287 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005288
5289 /* TODO clean the threads that are a part of this? */
5290
5291 /* free stuff */
5292
5293 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005294 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5295 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005296 __kmp_free( team );
5297
5298 KMP_MB();
5299 return next_pool;
5300}
5301
5302//
5303// Free the thread. Don't reap it, just place it on the pool of available
5304// threads.
5305//
5306// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5307// binding for the affinity mechanism to be useful.
5308//
5309// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5310// However, we want to avoid a potential performance problem by always
5311// scanning through the list to find the correct point at which to insert
5312// the thread (potential N**2 behavior). To do this we keep track of the
5313// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5314// With single-level parallelism, threads will always be added to the tail
5315// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5316// parallelism, all bets are off and we may need to scan through the entire
5317// free list.
5318//
5319// This change also has a potentially large performance benefit, for some
5320// applications. Previously, as threads were freed from the hot team, they
5321// would be placed back on the free list in inverse order. If the hot team
5322// grew back to it's original size, then the freed thread would be placed
5323// back on the hot team in reverse order. This could cause bad cache
5324// locality problems on programs where the size of the hot team regularly
5325// grew and shrunk.
5326//
5327// Now, for single-level parallelism, the OMP tid is alway == gtid.
5328//
5329void
5330__kmp_free_thread( kmp_info_t *this_th )
5331{
5332 int gtid;
5333 kmp_info_t **scan;
5334
5335 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5336 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5337
5338 KMP_DEBUG_ASSERT( this_th );
5339
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005340 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5341 int b;
5342 kmp_balign_t *balign = this_th->th.th_bar;
5343 for (b=0; b<bs_last_barrier; ++b) {
5344 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5345 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5346 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005347 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005348 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005349 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005350
Jim Cownie5e8470a2013-09-27 10:38:44 +00005351 /* put thread back on the free pool */
5352 TCW_PTR(this_th->th.th_team, NULL);
5353 TCW_PTR(this_th->th.th_root, NULL);
5354 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5355
5356 //
5357 // If the __kmp_thread_pool_insert_pt is already past the new insert
5358 // point, then we need to re-scan the entire list.
5359 //
5360 gtid = this_th->th.th_info.ds.ds_gtid;
5361 if ( __kmp_thread_pool_insert_pt != NULL ) {
5362 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5363 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5364 __kmp_thread_pool_insert_pt = NULL;
5365 }
5366 }
5367
5368 //
5369 // Scan down the list to find the place to insert the thread.
5370 // scan is the address of a link in the list, possibly the address of
5371 // __kmp_thread_pool itself.
5372 //
5373 // In the absence of nested parallism, the for loop will have 0 iterations.
5374 //
5375 if ( __kmp_thread_pool_insert_pt != NULL ) {
5376 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5377 }
5378 else {
5379 scan = (kmp_info_t **)&__kmp_thread_pool;
5380 }
5381 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5382 scan = &( (*scan)->th.th_next_pool ) );
5383
5384 //
5385 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5386 // to its address.
5387 //
5388 TCW_PTR(this_th->th.th_next_pool, *scan);
5389 __kmp_thread_pool_insert_pt = *scan = this_th;
5390 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5391 || ( this_th->th.th_info.ds.ds_gtid
5392 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5393 TCW_4(this_th->th.th_in_pool, TRUE);
5394 __kmp_thread_pool_nth++;
5395
5396 TCW_4(__kmp_nth, __kmp_nth - 1);
5397
5398#ifdef KMP_ADJUST_BLOCKTIME
5399 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005400 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5402 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5403 if ( __kmp_nth <= __kmp_avail_proc ) {
5404 __kmp_zero_bt = FALSE;
5405 }
5406 }
5407#endif /* KMP_ADJUST_BLOCKTIME */
5408
5409 KMP_MB();
5410}
5411
Jim Cownie5e8470a2013-09-27 10:38:44 +00005412
Jim Cownie5e8470a2013-09-27 10:38:44 +00005413/* ------------------------------------------------------------------------ */
5414
5415void *
5416__kmp_launch_thread( kmp_info_t *this_thr )
5417{
5418 int gtid = this_thr->th.th_info.ds.ds_gtid;
5419/* void *stack_data;*/
5420 kmp_team_t *(*volatile pteam);
5421
5422 KMP_MB();
5423 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5424
5425 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005426 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005427 }
5428
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005429#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005430 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005431 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5432 this_thr->th.ompt_thread_info.wait_id = 0;
5433 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005434 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005435 __ompt_thread_begin(ompt_thread_worker, gtid);
5436 }
5437 }
5438#endif
5439
Jim Cownie5e8470a2013-09-27 10:38:44 +00005440 /* This is the place where threads wait for work */
5441 while( ! TCR_4(__kmp_global.g.g_done) ) {
5442 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5443 KMP_MB();
5444
5445 /* wait for work to do */
5446 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5447
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005448#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005449 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005450 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5451 }
5452#endif
5453
Jim Cownie5e8470a2013-09-27 10:38:44 +00005454 /* No tid yet since not part of a team */
5455 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5456
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005457#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005458 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005459 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5460 }
5461#endif
5462
Jim Cownie5e8470a2013-09-27 10:38:44 +00005463 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5464
5465 /* have we been allocated? */
5466 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005467#if OMPT_SUPPORT
5468 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005469 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005470 if (ompt_enabled) {
5471 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005472 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005473 }
5474#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005475 /* we were just woken up, so run our new task */
5476 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5477 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005478 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5479 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005480
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005481 updateHWFPControl (*pteam);
5482
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005483#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005484 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005485 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005486 // Initialize OMPT task id for implicit task.
5487 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005488 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005489 }
5490#endif
5491
Jonathan Peyton45be4502015-08-11 21:36:41 +00005492 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005493 {
Jonathan Peyton45be4502015-08-11 21:36:41 +00005494 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005495 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5496 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005497 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005498 }
Jonathan Peyton45be4502015-08-11 21:36:41 +00005499 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005500 KMP_ASSERT( rc );
5501
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005502#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005503 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005504 /* no frame set while outside task */
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005505 task_info->frame.exit_runtime_frame = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005506
5507 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5508 }
5509#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005510 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005511 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5512 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005513 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005514 /* join barrier after parallel region */
5515 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005516#if OMPT_SUPPORT && OMPT_TRACE
5517 if (ompt_enabled) {
5518 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005519 // don't access *pteam here: it may have already been freed
5520 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005521 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5522 my_parallel_id, task_info->task_id);
5523 }
5524 task_info->frame.exit_runtime_frame = 0;
5525 task_info->task_id = 0;
5526 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005527#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005528 }
5529 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005530 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005531
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005532#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005533 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005534 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5535 __ompt_thread_end(ompt_thread_worker, gtid);
5536 }
5537#endif
5538
Jonathan Peyton54127982015-11-04 21:37:48 +00005539 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005540 /* run the destructors for the threadprivate data for this thread */
5541 __kmp_common_destroy_gtid( gtid );
5542
5543 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5544 KMP_MB();
5545 return this_thr;
5546}
5547
5548/* ------------------------------------------------------------------------ */
5549/* ------------------------------------------------------------------------ */
5550
Jim Cownie5e8470a2013-09-27 10:38:44 +00005551void
5552__kmp_internal_end_dest( void *specific_gtid )
5553{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005554 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005555 #pragma warning( push )
5556 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5557 #endif
5558 // Make sure no significant bits are lost
5559 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005560 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005561 #pragma warning( pop )
5562 #endif
5563
5564 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5565 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5566 * this is because 0 is reserved for the nothing-stored case */
5567
5568 /* josh: One reason for setting the gtid specific data even when it is being
5569 destroyed by pthread is to allow gtid lookup through thread specific data
5570 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5571 that gets executed in the call to __kmp_internal_end_thread, actually
5572 gets the gtid through the thread specific data. Setting it here seems
5573 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5574 to run smoothly.
5575 todo: get rid of this after we remove the dependence on
5576 __kmp_gtid_get_specific
5577 */
5578 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5579 __kmp_gtid_set_specific( gtid );
5580 #ifdef KMP_TDATA_GTID
5581 __kmp_gtid = gtid;
5582 #endif
5583 __kmp_internal_end_thread( gtid );
5584}
5585
Jonathan Peyton99016992015-05-26 17:32:53 +00005586#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005587
5588// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005589// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005590// option in makefile.mk works fine.
5591
5592__attribute__(( destructor ))
5593void
5594__kmp_internal_end_dtor( void )
5595{
5596 __kmp_internal_end_atexit();
5597}
5598
5599void
5600__kmp_internal_end_fini( void )
5601{
5602 __kmp_internal_end_atexit();
5603}
5604
5605#endif
5606
5607/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5608void
5609__kmp_internal_end_atexit( void )
5610{
5611 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5612 /* [Windows]
5613 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5614 stat code that depends on thread specific data for gtid fails because that data becomes
5615 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5616 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5617 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5618
5619// TODO: Can some of this comment about GVS be removed?
5620 I suspect that the offending stat code is executed when the calling thread tries to
5621 clean up a dead root thread's data structures, resulting in GVS code trying to close
5622 the GVS structures for that thread, but since the stat code uses
5623 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5624 cleaning up itself instead of another thread, it gets confused. This happens because
5625 allowing a thread to unregister and cleanup another thread is a recent modification for
5626 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5627 thread may end up trying to unregister another thread only if thread death does not
5628 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5629 specific data destructor function to detect thread death. For Windows dynamic, there
5630 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5631 workaround is applicable only for Windows static stat library.
5632 */
5633 __kmp_internal_end_library( -1 );
5634 #if KMP_OS_WINDOWS
5635 __kmp_close_console();
5636 #endif
5637}
5638
5639static void
5640__kmp_reap_thread(
5641 kmp_info_t * thread,
5642 int is_root
5643) {
5644
Alp Toker8f2d3f02014-02-24 10:40:15 +00005645 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005646
5647 int gtid;
5648
5649 KMP_DEBUG_ASSERT( thread != NULL );
5650
5651 gtid = thread->th.th_info.ds.ds_gtid;
5652
5653 if ( ! is_root ) {
5654
5655 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5656 /* Assume the threads are at the fork barrier here */
5657 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5658 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005659 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5660 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005661 }; // if
5662
Jim Cownie5e8470a2013-09-27 10:38:44 +00005663 // Terminate OS thread.
5664 __kmp_reap_worker( thread );
5665
5666 //
5667 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005668 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005669 //
5670 // There is a small timing hole here - if the worker thread was
5671 // just waking up after sleeping in the pool, had reset it's
5672 // th_active_in_pool flag but not decremented the global counter
5673 // __kmp_thread_pool_active_nth yet, then the global counter
5674 // might not get updated.
5675 //
5676 // Currently, this can only happen as the library is unloaded,
5677 // so there are no harmful side effects.
5678 //
5679 if ( thread->th.th_active_in_pool ) {
5680 thread->th.th_active_in_pool = FALSE;
5681 KMP_TEST_THEN_DEC32(
5682 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5683 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5684 }
5685
5686 // Decrement # of [worker] threads in the pool.
5687 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5688 --__kmp_thread_pool_nth;
5689 }; // if
5690
5691 // Free the fast memory for tasking
5692 #if USE_FAST_MEMORY
5693 __kmp_free_fast_memory( thread );
5694 #endif /* USE_FAST_MEMORY */
5695
5696 __kmp_suspend_uninitialize_thread( thread );
5697
5698 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5699 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5700
5701 -- __kmp_all_nth;
5702 // __kmp_nth was decremented when thread is added to the pool.
5703
5704#ifdef KMP_ADJUST_BLOCKTIME
5705 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005706 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005707 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5708 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5709 if ( __kmp_nth <= __kmp_avail_proc ) {
5710 __kmp_zero_bt = FALSE;
5711 }
5712 }
5713#endif /* KMP_ADJUST_BLOCKTIME */
5714
5715 /* free the memory being used */
5716 if( __kmp_env_consistency_check ) {
5717 if ( thread->th.th_cons ) {
5718 __kmp_free_cons_stack( thread->th.th_cons );
5719 thread->th.th_cons = NULL;
5720 }; // if
5721 }
5722
5723 if ( thread->th.th_pri_common != NULL ) {
5724 __kmp_free( thread->th.th_pri_common );
5725 thread->th.th_pri_common = NULL;
5726 }; // if
5727
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005728 if (thread->th.th_task_state_memo_stack != NULL) {
5729 __kmp_free(thread->th.th_task_state_memo_stack);
5730 thread->th.th_task_state_memo_stack = NULL;
5731 }
5732
Jim Cownie5e8470a2013-09-27 10:38:44 +00005733 #if KMP_USE_BGET
5734 if ( thread->th.th_local.bget_data != NULL ) {
5735 __kmp_finalize_bget( thread );
5736 }; // if
5737 #endif
5738
Alp Toker98758b02014-03-02 04:12:06 +00005739#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005740 if ( thread->th.th_affin_mask != NULL ) {
5741 KMP_CPU_FREE( thread->th.th_affin_mask );
5742 thread->th.th_affin_mask = NULL;
5743 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005744#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005745
5746 __kmp_reap_team( thread->th.th_serial_team );
5747 thread->th.th_serial_team = NULL;
5748 __kmp_free( thread );
5749
5750 KMP_MB();
5751
5752} // __kmp_reap_thread
5753
5754static void
5755__kmp_internal_end(void)
5756{
5757 int i;
5758
5759 /* First, unregister the library */
5760 __kmp_unregister_library();
5761
5762 #if KMP_OS_WINDOWS
5763 /* In Win static library, we can't tell when a root actually dies, so we
5764 reclaim the data structures for any root threads that have died but not
5765 unregistered themselves, in order to shut down cleanly.
5766 In Win dynamic library we also can't tell when a thread dies.
5767 */
5768 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5769 #endif
5770
5771 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5772 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005773 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005774 break;
5775 KMP_MB(); /* Flush all pending memory write invalidates. */
5776 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5777
5778 if ( i < __kmp_threads_capacity ) {
5779 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5780 KMP_MB(); /* Flush all pending memory write invalidates. */
5781
5782 //
5783 // Need to check that monitor was initialized before reaping it.
5784 // If we are called form __kmp_atfork_child (which sets
5785 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5786 // contain valid data, but it is only valid in the parent process,
5787 // not the child.
5788 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005789 // New behavior (201008): instead of keying off of the flag
5790 // __kmp_init_parallel, the monitor thread creation is keyed off
5791 // of the new flag __kmp_init_monitor.
5792 //
5793 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5794 if ( TCR_4( __kmp_init_monitor ) ) {
5795 __kmp_reap_monitor( & __kmp_monitor );
5796 TCW_4( __kmp_init_monitor, 0 );
5797 }
5798 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5799 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5800 } else {
5801 /* TODO move this to cleanup code */
5802 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005803 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005804 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5805 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005806// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005807 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005808 }
5809 }
5810 #endif
5811
5812 KMP_MB();
5813
5814 // Reap the worker threads.
5815 // This is valid for now, but be careful if threads are reaped sooner.
5816 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5817 // Get the next thread from the pool.
5818 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5819 __kmp_thread_pool = thread->th.th_next_pool;
5820 // Reap it.
5821 thread->th.th_next_pool = NULL;
5822 thread->th.th_in_pool = FALSE;
5823 __kmp_reap_thread( thread, 0 );
5824 }; // while
5825 __kmp_thread_pool_insert_pt = NULL;
5826
5827 // Reap teams.
5828 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5829 // Get the next team from the pool.
5830 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5831 __kmp_team_pool = team->t.t_next_pool;
5832 // Reap it.
5833 team->t.t_next_pool = NULL;
5834 __kmp_reap_team( team );
5835 }; // while
5836
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005837 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005838
5839 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5840 // TBD: Add some checking...
5841 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5842 }
5843
5844 /* Make sure all threadprivate destructors get run by joining with all worker
5845 threads before resetting this flag */
5846 TCW_SYNC_4(__kmp_init_common, FALSE);
5847
5848 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5849 KMP_MB();
5850
5851 //
5852 // See note above: One of the possible fixes for CQ138434 / CQ140126
5853 //
5854 // FIXME: push both code fragments down and CSE them?
5855 // push them into __kmp_cleanup() ?
5856 //
5857 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5858 if ( TCR_4( __kmp_init_monitor ) ) {
5859 __kmp_reap_monitor( & __kmp_monitor );
5860 TCW_4( __kmp_init_monitor, 0 );
5861 }
5862 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5863 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5864
5865 } /* else !__kmp_global.t_active */
5866 TCW_4(__kmp_init_gtid, FALSE);
5867 KMP_MB(); /* Flush all pending memory write invalidates. */
5868
Jim Cownie5e8470a2013-09-27 10:38:44 +00005869 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005870#if OMPT_SUPPORT
5871 ompt_fini();
5872#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005873}
5874
5875void
5876__kmp_internal_end_library( int gtid_req )
5877{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005878 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5879 /* this shouldn't be a race condition because __kmp_internal_end() is the
5880 * only place to clear __kmp_serial_init */
5881 /* we'll check this later too, after we get the lock */
5882 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5883 // because the next check will work in any case.
5884 if( __kmp_global.g.g_abort ) {
5885 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5886 /* TODO abort? */
5887 return;
5888 }
5889 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5890 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5891 return;
5892 }
5893
5894
5895 KMP_MB(); /* Flush all pending memory write invalidates. */
5896
5897 /* find out who we are and what we should do */
5898 {
5899 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5900 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5901 if( gtid == KMP_GTID_SHUTDOWN ) {
5902 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5903 return;
5904 } else if( gtid == KMP_GTID_MONITOR ) {
5905 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5906 return;
5907 } else if( gtid == KMP_GTID_DNE ) {
5908 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5909 /* we don't know who we are, but we may still shutdown the library */
5910 } else if( KMP_UBER_GTID( gtid )) {
5911 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005912 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005913 __kmp_global.g.g_abort = -1;
5914 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5915 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5916 return;
5917 } else {
5918 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5919 __kmp_unregister_root_current_thread( gtid );
5920 }
5921 } else {
5922 /* worker threads may call this function through the atexit handler, if they call exit() */
5923 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5924 TODO: do a thorough shutdown instead
5925 */
5926 #ifdef DUMP_DEBUG_ON_EXIT
5927 if ( __kmp_debug_buf )
5928 __kmp_dump_debug_buffer( );
5929 #endif
5930 return;
5931 }
5932 }
5933 /* synchronize the termination process */
5934 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5935
5936 /* have we already finished */
5937 if( __kmp_global.g.g_abort ) {
5938 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5939 /* TODO abort? */
5940 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5941 return;
5942 }
5943 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5944 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5945 return;
5946 }
5947
5948 /* We need this lock to enforce mutex between this reading of
5949 __kmp_threads_capacity and the writing by __kmp_register_root.
5950 Alternatively, we can use a counter of roots that is
5951 atomically updated by __kmp_get_global_thread_id_reg,
5952 __kmp_do_serial_initialize and __kmp_internal_end_*.
5953 */
5954 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5955
5956 /* now we can safely conduct the actual termination */
5957 __kmp_internal_end();
5958
5959 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5960 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5961
5962 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5963
5964 #ifdef DUMP_DEBUG_ON_EXIT
5965 if ( __kmp_debug_buf )
5966 __kmp_dump_debug_buffer();
5967 #endif
5968
5969 #if KMP_OS_WINDOWS
5970 __kmp_close_console();
5971 #endif
5972
5973 __kmp_fini_allocator();
5974
5975} // __kmp_internal_end_library
5976
5977void
5978__kmp_internal_end_thread( int gtid_req )
5979{
5980 int i;
5981
5982 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5983 /* this shouldn't be a race condition because __kmp_internal_end() is the
5984 * only place to clear __kmp_serial_init */
5985 /* we'll check this later too, after we get the lock */
5986 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
5987 // because the next check will work in any case.
5988 if( __kmp_global.g.g_abort ) {
5989 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
5990 /* TODO abort? */
5991 return;
5992 }
5993 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5994 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
5995 return;
5996 }
5997
5998 KMP_MB(); /* Flush all pending memory write invalidates. */
5999
6000 /* find out who we are and what we should do */
6001 {
6002 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6003 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6004 if( gtid == KMP_GTID_SHUTDOWN ) {
6005 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6006 return;
6007 } else if( gtid == KMP_GTID_MONITOR ) {
6008 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6009 return;
6010 } else if( gtid == KMP_GTID_DNE ) {
6011 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6012 return;
6013 /* we don't know who we are */
6014 } else if( KMP_UBER_GTID( gtid )) {
6015 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006016 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006017 __kmp_global.g.g_abort = -1;
6018 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6019 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6020 return;
6021 } else {
6022 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6023 __kmp_unregister_root_current_thread( gtid );
6024 }
6025 } else {
6026 /* just a worker thread, let's leave */
6027 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6028
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006029 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006030 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006032
6033 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6034 return;
6035 }
6036 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006037 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006038 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6039 // because we will better shutdown later in the library destructor.
6040 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006041 // in a loop forks and joins many openmp threads. We can save a lot of time
6042 // keeping worker threads alive until the program shutdown.
6043 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6044 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006045 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006046 return;
6047 #endif
6048 /* synchronize the termination process */
6049 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6050
6051 /* have we already finished */
6052 if( __kmp_global.g.g_abort ) {
6053 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6054 /* TODO abort? */
6055 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6056 return;
6057 }
6058 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6059 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6060 return;
6061 }
6062
6063 /* We need this lock to enforce mutex between this reading of
6064 __kmp_threads_capacity and the writing by __kmp_register_root.
6065 Alternatively, we can use a counter of roots that is
6066 atomically updated by __kmp_get_global_thread_id_reg,
6067 __kmp_do_serial_initialize and __kmp_internal_end_*.
6068 */
6069
6070 /* should we finish the run-time? are all siblings done? */
6071 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6072
6073 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6074 if ( KMP_UBER_GTID( i ) ) {
6075 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6076 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6077 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6078 return;
6079 };
6080 }
6081
6082 /* now we can safely conduct the actual termination */
6083
6084 __kmp_internal_end();
6085
6086 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6087 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6088
Jim Cownie77c2a632014-09-03 11:34:33 +00006089 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006090
6091 #ifdef DUMP_DEBUG_ON_EXIT
6092 if ( __kmp_debug_buf )
6093 __kmp_dump_debug_buffer();
6094 #endif
6095} // __kmp_internal_end_thread
6096
6097// -------------------------------------------------------------------------------------------------
6098// Library registration stuff.
6099
6100static long __kmp_registration_flag = 0;
6101 // Random value used to indicate library initialization.
6102static char * __kmp_registration_str = NULL;
6103 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6104
6105
6106static inline
6107char *
6108__kmp_reg_status_name() {
6109 /*
6110 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6111 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6112 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6113 */
6114 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6115} // __kmp_reg_status_get
6116
6117
6118void
6119__kmp_register_library_startup(
6120 void
6121) {
6122
6123 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6124 int done = 0;
6125 union {
6126 double dtime;
6127 long ltime;
6128 } time;
6129 #if KMP_OS_WINDOWS
6130 __kmp_initialize_system_tick();
6131 #endif
6132 __kmp_read_system_time( & time.dtime );
6133 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6134 __kmp_registration_str =
6135 __kmp_str_format(
6136 "%p-%lx-%s",
6137 & __kmp_registration_flag,
6138 __kmp_registration_flag,
6139 KMP_LIBRARY_FILE
6140 );
6141
6142 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6143
6144 while ( ! done ) {
6145
6146 char * value = NULL; // Actual value of the environment variable.
6147
6148 // Set environment variable, but do not overwrite if it is exist.
6149 __kmp_env_set( name, __kmp_registration_str, 0 );
6150 // Check the variable is written.
6151 value = __kmp_env_get( name );
6152 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6153
6154 done = 1; // Ok, environment variable set successfully, exit the loop.
6155
6156 } else {
6157
6158 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6159 // Check whether it alive or dead.
6160 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6161 char * tail = value;
6162 char * flag_addr_str = NULL;
6163 char * flag_val_str = NULL;
6164 char const * file_name = NULL;
6165 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6166 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6167 file_name = tail;
6168 if ( tail != NULL ) {
6169 long * flag_addr = 0;
6170 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006171 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6172 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006173 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6174 // First, check whether environment-encoded address is mapped into addr space.
6175 // If so, dereference it to see if it still has the right value.
6176
6177 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6178 neighbor = 1;
6179 } else {
6180 // If not, then we know the other copy of the library is no longer running.
6181 neighbor = 2;
6182 }; // if
6183 }; // if
6184 }; // if
6185 switch ( neighbor ) {
6186 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6187 // Assume it is the incompatible format of future version of the library.
6188 // Assume the other library is alive.
6189 // WARN( ... ); // TODO: Issue a warning.
6190 file_name = "unknown library";
6191 // Attention! Falling to the next case. That's intentional.
6192 case 1 : { // Neighbor is alive.
6193 // Check it is allowed.
6194 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6195 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6196 // That's not allowed. Issue fatal error.
6197 __kmp_msg(
6198 kmp_ms_fatal,
6199 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6200 KMP_HNT( DuplicateLibrary ),
6201 __kmp_msg_null
6202 );
6203 }; // if
6204 KMP_INTERNAL_FREE( duplicate_ok );
6205 __kmp_duplicate_library_ok = 1;
6206 done = 1; // Exit the loop.
6207 } break;
6208 case 2 : { // Neighbor is dead.
6209 // Clear the variable and try to register library again.
6210 __kmp_env_unset( name );
6211 } break;
6212 default : {
6213 KMP_DEBUG_ASSERT( 0 );
6214 } break;
6215 }; // switch
6216
6217 }; // if
6218 KMP_INTERNAL_FREE( (void *) value );
6219
6220 }; // while
6221 KMP_INTERNAL_FREE( (void *) name );
6222
6223} // func __kmp_register_library_startup
6224
6225
6226void
6227__kmp_unregister_library( void ) {
6228
6229 char * name = __kmp_reg_status_name();
6230 char * value = __kmp_env_get( name );
6231
6232 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6233 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6234 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6235 // Ok, this is our variable. Delete it.
6236 __kmp_env_unset( name );
6237 }; // if
6238
6239 KMP_INTERNAL_FREE( __kmp_registration_str );
6240 KMP_INTERNAL_FREE( value );
6241 KMP_INTERNAL_FREE( name );
6242
6243 __kmp_registration_flag = 0;
6244 __kmp_registration_str = NULL;
6245
6246} // __kmp_unregister_library
6247
6248
6249// End of Library registration stuff.
6250// -------------------------------------------------------------------------------------------------
6251
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006252#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6253
6254static void __kmp_check_mic_type()
6255{
6256 kmp_cpuid_t cpuid_state = {0};
6257 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006258 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006259 // We don't support mic1 at the moment
6260 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6261 __kmp_mic_type = mic2;
6262 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6263 __kmp_mic_type = mic3;
6264 } else {
6265 __kmp_mic_type = non_mic;
6266 }
6267}
6268
6269#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6270
Jim Cownie5e8470a2013-09-27 10:38:44 +00006271static void
6272__kmp_do_serial_initialize( void )
6273{
6274 int i, gtid;
6275 int size;
6276
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006277 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006278
6279 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6280 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6281 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6282 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6283 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6284
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006285#if OMPT_SUPPORT
6286 ompt_pre_init();
6287#endif
6288
Jim Cownie5e8470a2013-09-27 10:38:44 +00006289 __kmp_validate_locks();
6290
6291 /* Initialize internal memory allocator */
6292 __kmp_init_allocator();
6293
6294 /* Register the library startup via an environment variable
6295 and check to see whether another copy of the library is already
6296 registered. */
6297
6298 __kmp_register_library_startup( );
6299
6300 /* TODO reinitialization of library */
6301 if( TCR_4(__kmp_global.g.g_done) ) {
6302 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6303 }
6304
6305 __kmp_global.g.g_abort = 0;
6306 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6307
6308 /* initialize the locks */
6309#if KMP_USE_ADAPTIVE_LOCKS
6310#if KMP_DEBUG_ADAPTIVE_LOCKS
6311 __kmp_init_speculative_stats();
6312#endif
6313#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006314#if KMP_STATS_ENABLED
6315 __kmp_init_tas_lock( & __kmp_stats_lock );
6316#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006317 __kmp_init_lock( & __kmp_global_lock );
6318 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6319 __kmp_init_lock( & __kmp_debug_lock );
6320 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6321 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6322 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6323 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6324 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6325 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6326 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6327 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6328 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6329 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6330 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6331 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6332 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6333 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6334 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6335 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6336 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6337
6338 /* conduct initialization and initial setup of configuration */
6339
6340 __kmp_runtime_initialize();
6341
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006342#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6343 __kmp_check_mic_type();
6344#endif
6345
Jim Cownie5e8470a2013-09-27 10:38:44 +00006346 // Some global variable initialization moved here from kmp_env_initialize()
6347#ifdef KMP_DEBUG
6348 kmp_diag = 0;
6349#endif
6350 __kmp_abort_delay = 0;
6351
6352 // From __kmp_init_dflt_team_nth()
6353 /* assume the entire machine will be used */
6354 __kmp_dflt_team_nth_ub = __kmp_xproc;
6355 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6356 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6357 }
6358 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6359 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6360 }
6361 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362
6363 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6364 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6365 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6366 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6367 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6368 __kmp_library = library_throughput;
6369 // From KMP_SCHEDULE initialization
6370 __kmp_static = kmp_sch_static_balanced;
6371 // AC: do not use analytical here, because it is non-monotonous
6372 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006373 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006374 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6375 // control parts
6376 #if KMP_FAST_REDUCTION_BARRIER
6377 #define kmp_reduction_barrier_gather_bb ((int)1)
6378 #define kmp_reduction_barrier_release_bb ((int)1)
6379 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6380 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6381 #endif // KMP_FAST_REDUCTION_BARRIER
6382 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6383 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6384 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6385 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6386 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6387 #if KMP_FAST_REDUCTION_BARRIER
6388 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6389 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6390 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6391 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6392 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6393 }
6394 #endif // KMP_FAST_REDUCTION_BARRIER
6395 }
6396 #if KMP_FAST_REDUCTION_BARRIER
6397 #undef kmp_reduction_barrier_release_pat
6398 #undef kmp_reduction_barrier_gather_pat
6399 #undef kmp_reduction_barrier_release_bb
6400 #undef kmp_reduction_barrier_gather_bb
6401 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006402#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006403 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006404 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006405 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006406 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6407 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6408 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6409 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006410#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006411 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006412 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6413 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6414 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006415#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006416#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006417
6418 // From KMP_CHECKS initialization
6419#ifdef KMP_DEBUG
6420 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6421#else
6422 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6423#endif
6424
6425 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6426 __kmp_foreign_tp = TRUE;
6427
6428 __kmp_global.g.g_dynamic = FALSE;
6429 __kmp_global.g.g_dynamic_mode = dynamic_default;
6430
6431 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006432
Jim Cownie5e8470a2013-09-27 10:38:44 +00006433 // Print all messages in message catalog for testing purposes.
6434 #ifdef KMP_DEBUG
6435 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6436 if ( __kmp_str_match_true( val ) ) {
6437 kmp_str_buf_t buffer;
6438 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006439 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006440 __kmp_printf( "%s", buffer.str );
6441 __kmp_str_buf_free( & buffer );
6442 }; // if
6443 __kmp_env_free( & val );
6444 #endif
6445
Jim Cownie181b4bb2013-12-23 17:28:57 +00006446 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006447 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6448 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6449
Jim Cownie5e8470a2013-09-27 10:38:44 +00006450 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6451 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6452 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6453 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6454 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6455 __kmp_thread_pool = NULL;
6456 __kmp_thread_pool_insert_pt = NULL;
6457 __kmp_team_pool = NULL;
6458
6459 /* Allocate all of the variable sized records */
6460 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6461 /* Since allocation is cache-aligned, just add extra padding at the end */
6462 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6463 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6464 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6465
6466 /* init thread counts */
6467 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6468 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6469 __kmp_all_nth = 0;
6470 __kmp_nth = 0;
6471
6472 /* setup the uber master thread and hierarchy */
6473 gtid = __kmp_register_root( TRUE );
6474 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6475 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6476 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6477
6478 KMP_MB(); /* Flush all pending memory write invalidates. */
6479
6480 __kmp_common_initialize();
6481
6482 #if KMP_OS_UNIX
6483 /* invoke the child fork handler */
6484 __kmp_register_atfork();
6485 #endif
6486
Jonathan Peyton99016992015-05-26 17:32:53 +00006487 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006488 {
6489 /* Invoke the exit handler when the program finishes, only for static library.
6490 For dynamic library, we already have _fini and DllMain.
6491 */
6492 int rc = atexit( __kmp_internal_end_atexit );
6493 if ( rc != 0 ) {
6494 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6495 }; // if
6496 }
6497 #endif
6498
6499 #if KMP_HANDLE_SIGNALS
6500 #if KMP_OS_UNIX
6501 /* NOTE: make sure that this is called before the user installs
6502 * their own signal handlers so that the user handlers
6503 * are called first. this way they can return false,
6504 * not call our handler, avoid terminating the library,
6505 * and continue execution where they left off. */
6506 __kmp_install_signals( FALSE );
6507 #endif /* KMP_OS_UNIX */
6508 #if KMP_OS_WINDOWS
6509 __kmp_install_signals( TRUE );
6510 #endif /* KMP_OS_WINDOWS */
6511 #endif
6512
6513 /* we have finished the serial initialization */
6514 __kmp_init_counter ++;
6515
6516 __kmp_init_serial = TRUE;
6517
6518 if (__kmp_settings) {
6519 __kmp_env_print();
6520 }
6521
6522#if OMP_40_ENABLED
6523 if (__kmp_display_env || __kmp_display_env_verbose) {
6524 __kmp_env_print_2();
6525 }
6526#endif // OMP_40_ENABLED
6527
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006528#if OMPT_SUPPORT
6529 ompt_post_init();
6530#endif
6531
Jim Cownie5e8470a2013-09-27 10:38:44 +00006532 KMP_MB();
6533
6534 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6535}
6536
6537void
6538__kmp_serial_initialize( void )
6539{
6540 if ( __kmp_init_serial ) {
6541 return;
6542 }
6543 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6544 if ( __kmp_init_serial ) {
6545 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6546 return;
6547 }
6548 __kmp_do_serial_initialize();
6549 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6550}
6551
6552static void
6553__kmp_do_middle_initialize( void )
6554{
6555 int i, j;
6556 int prev_dflt_team_nth;
6557
6558 if( !__kmp_init_serial ) {
6559 __kmp_do_serial_initialize();
6560 }
6561
6562 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6563
6564 //
6565 // Save the previous value for the __kmp_dflt_team_nth so that
6566 // we can avoid some reinitialization if it hasn't changed.
6567 //
6568 prev_dflt_team_nth = __kmp_dflt_team_nth;
6569
Alp Toker98758b02014-03-02 04:12:06 +00006570#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006571 //
6572 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6573 // number of cores on the machine.
6574 //
6575 __kmp_affinity_initialize();
6576
6577 //
6578 // Run through the __kmp_threads array and set the affinity mask
6579 // for each root thread that is currently registered with the RTL.
6580 //
6581 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6582 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6583 __kmp_affinity_set_init_mask( i, TRUE );
6584 }
6585 }
Alp Toker98758b02014-03-02 04:12:06 +00006586#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006587
6588 KMP_ASSERT( __kmp_xproc > 0 );
6589 if ( __kmp_avail_proc == 0 ) {
6590 __kmp_avail_proc = __kmp_xproc;
6591 }
6592
6593 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6594 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006595 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006596 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6597 j++;
6598 }
6599
6600 if ( __kmp_dflt_team_nth == 0 ) {
6601#ifdef KMP_DFLT_NTH_CORES
6602 //
6603 // Default #threads = #cores
6604 //
6605 __kmp_dflt_team_nth = __kmp_ncores;
6606 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6607 __kmp_dflt_team_nth ) );
6608#else
6609 //
6610 // Default #threads = #available OS procs
6611 //
6612 __kmp_dflt_team_nth = __kmp_avail_proc;
6613 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6614 __kmp_dflt_team_nth ) );
6615#endif /* KMP_DFLT_NTH_CORES */
6616 }
6617
6618 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6619 __kmp_dflt_team_nth = KMP_MIN_NTH;
6620 }
6621 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6622 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6623 }
6624
6625 //
6626 // There's no harm in continuing if the following check fails,
6627 // but it indicates an error in the previous logic.
6628 //
6629 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6630
6631 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6632 //
6633 // Run through the __kmp_threads array and set the num threads icv
6634 // for each root thread that is currently registered with the RTL
6635 // (which has not already explicitly set its nthreads-var with a
6636 // call to omp_set_num_threads()).
6637 //
6638 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6639 kmp_info_t *thread = __kmp_threads[ i ];
6640 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006641 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006642
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006643 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006644 }
6645 }
6646 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6647 __kmp_dflt_team_nth) );
6648
6649#ifdef KMP_ADJUST_BLOCKTIME
6650 /* Adjust blocktime to zero if necessary */
6651 /* now that __kmp_avail_proc is set */
6652 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6653 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6654 if ( __kmp_nth > __kmp_avail_proc ) {
6655 __kmp_zero_bt = TRUE;
6656 }
6657 }
6658#endif /* KMP_ADJUST_BLOCKTIME */
6659
6660 /* we have finished middle initialization */
6661 TCW_SYNC_4(__kmp_init_middle, TRUE);
6662
6663 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6664}
6665
6666void
6667__kmp_middle_initialize( void )
6668{
6669 if ( __kmp_init_middle ) {
6670 return;
6671 }
6672 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6673 if ( __kmp_init_middle ) {
6674 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6675 return;
6676 }
6677 __kmp_do_middle_initialize();
6678 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6679}
6680
6681void
6682__kmp_parallel_initialize( void )
6683{
6684 int gtid = __kmp_entry_gtid(); // this might be a new root
6685
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006686 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006687 if( TCR_4(__kmp_init_parallel) ) return;
6688 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6689 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6690
6691 /* TODO reinitialization after we have already shut down */
6692 if( TCR_4(__kmp_global.g.g_done) ) {
6693 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6694 __kmp_infinite_loop();
6695 }
6696
6697 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6698 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6699 */
6700 if( !__kmp_init_middle ) {
6701 __kmp_do_middle_initialize();
6702 }
6703
6704 /* begin initialization */
6705 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6706 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6707
6708#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6709 //
6710 // Save the FP control regs.
6711 // Worker threads will set theirs to these values at thread startup.
6712 //
6713 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6714 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6715 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6716#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6717
6718#if KMP_OS_UNIX
6719# if KMP_HANDLE_SIGNALS
6720 /* must be after __kmp_serial_initialize */
6721 __kmp_install_signals( TRUE );
6722# endif
6723#endif
6724
6725 __kmp_suspend_initialize();
6726
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006727#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006728 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6729 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6730 }
6731#else
6732 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6733 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6734 }
6735#endif
6736
6737 if ( __kmp_version ) {
6738 __kmp_print_version_2();
6739 }
6740
Jim Cownie5e8470a2013-09-27 10:38:44 +00006741 /* we have finished parallel initialization */
6742 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6743
6744 KMP_MB();
6745 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6746
6747 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6748}
6749
6750
6751/* ------------------------------------------------------------------------ */
6752
6753void
6754__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6755 kmp_team_t *team )
6756{
6757 kmp_disp_t *dispatch;
6758
6759 KMP_MB();
6760
6761 /* none of the threads have encountered any constructs, yet. */
6762 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006763#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006764 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006765#endif /* KMP_CACHE_MANAGE */
6766 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6767 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006768 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6769 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006770
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006771 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006772#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006773 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006775 if( __kmp_env_consistency_check )
6776 __kmp_push_parallel( gtid, team->t.t_ident );
6777
6778 KMP_MB(); /* Flush all pending memory write invalidates. */
6779}
6780
6781void
6782__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6783 kmp_team_t *team )
6784{
6785 if( __kmp_env_consistency_check )
6786 __kmp_pop_parallel( gtid, team->t.t_ident );
6787}
6788
6789int
6790__kmp_invoke_task_func( int gtid )
6791{
6792 int rc;
6793 int tid = __kmp_tid_from_gtid( gtid );
6794 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006795 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006796
6797 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6798#if USE_ITT_BUILD
6799 if ( __itt_stack_caller_create_ptr ) {
6800 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6801 }
6802#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006803#if INCLUDE_SSC_MARKS
6804 SSC_MARK_INVOKING();
6805#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006806
6807#if OMPT_SUPPORT
6808 void *dummy;
6809 void **exit_runtime_p;
6810 ompt_task_id_t my_task_id;
6811 ompt_parallel_id_t my_parallel_id;
6812
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006813 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006814 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6815 ompt_task_info.frame.exit_runtime_frame);
6816 } else {
6817 exit_runtime_p = &dummy;
6818 }
6819
6820#if OMPT_TRACE
6821 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6822 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006823 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006824 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6825 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6826 my_parallel_id, my_task_id);
6827 }
6828#endif
6829#endif
6830
Jonathan Peyton45be4502015-08-11 21:36:41 +00006831 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006832 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6833 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006834 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6835 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006836#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006837 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006838#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006839 );
6840 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006841
Jim Cownie5e8470a2013-09-27 10:38:44 +00006842#if USE_ITT_BUILD
6843 if ( __itt_stack_caller_create_ptr ) {
6844 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6845 }
6846#endif /* USE_ITT_BUILD */
6847 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6848
6849 return rc;
6850}
6851
6852#if OMP_40_ENABLED
6853void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006854__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006855{
6856 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006857 kmp_info_t *thr = __kmp_threads[ gtid ];
6858 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006859 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006860 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6861 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6862 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006863 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006864 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006865 // Launch league of teams now, but not let workers execute
6866 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006867#if INCLUDE_SSC_MARKS
6868 SSC_MARK_FORKING();
6869#endif
6870 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006871 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006872#if OMPT_SUPPORT
6873 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6874#endif
6875 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006876 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6877 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006878#if INCLUDE_SSC_MARKS
6879 SSC_MARK_JOINING();
6880#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006881
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006882 // AC: last parameter "1" eliminates join barrier which won't work because
6883 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006884 __kmp_join_call( loc, gtid
6885#if OMPT_SUPPORT
6886 , fork_context_intel
6887#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006888 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006889}
6890
6891int
6892__kmp_invoke_teams_master( int gtid )
6893{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006894 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6895 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006896 #if KMP_DEBUG
6897 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6898 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6899 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006900 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6901 __kmp_teams_master( gtid );
6902 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006903 return 1;
6904}
6905#endif /* OMP_40_ENABLED */
6906
6907/* this sets the requested number of threads for the next parallel region
6908 * encountered by this team */
6909/* since this should be enclosed in the forkjoin critical section it
6910 * should avoid race conditions with assymmetrical nested parallelism */
6911
6912void
6913__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6914{
6915 kmp_info_t *thr = __kmp_threads[gtid];
6916
6917 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006918 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006919}
6920
6921#if OMP_40_ENABLED
6922
6923/* this sets the requested number of teams for the teams region and/or
6924 * the number of threads for the next parallel region encountered */
6925void
6926__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6927{
6928 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006929 KMP_DEBUG_ASSERT(num_teams >= 0);
6930 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006931
6932 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006933 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006934 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6935 if ( !__kmp_reserve_warn ) {
6936 __kmp_reserve_warn = 1;
6937 __kmp_msg(
6938 kmp_ms_warning,
6939 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6940 KMP_HNT( Unset_ALL_THREADS ),
6941 __kmp_msg_null
6942 );
6943 }
6944 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006945 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006946 // Set number of teams (number of threads in the outer "parallel" of the teams)
6947 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6948
6949 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006950 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006951 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006952 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006953 num_threads = __kmp_avail_proc / num_teams;
6954 if( num_teams * num_threads > __kmp_max_nth ) {
6955 // adjust num_threads w/o warning as it is not user setting
6956 num_threads = __kmp_max_nth / num_teams;
6957 }
6958 } else {
6959 if( num_teams * num_threads > __kmp_max_nth ) {
6960 int new_threads = __kmp_max_nth / num_teams;
6961 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6962 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6963 __kmp_msg(
6964 kmp_ms_warning,
6965 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6966 KMP_HNT( Unset_ALL_THREADS ),
6967 __kmp_msg_null
6968 );
6969 }
6970 num_threads = new_threads;
6971 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006972 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006973 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006974}
6975
6976
6977//
6978// Set the proc_bind var to use in the following parallel region.
6979//
6980void
6981__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
6982{
6983 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006984 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006985}
6986
6987#endif /* OMP_40_ENABLED */
6988
6989/* Launch the worker threads into the microtask. */
6990
6991void
6992__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
6993{
6994 kmp_info_t *this_thr = __kmp_threads[gtid];
6995
6996#ifdef KMP_DEBUG
6997 int f;
6998#endif /* KMP_DEBUG */
6999
7000 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007001 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007002 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7003 KMP_MB(); /* Flush all pending memory write invalidates. */
7004
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007005 team->t.t_construct = 0; /* no single directives seen yet */
7006 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007007
7008 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007009 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007010 if ( team->t.t_max_nproc > 1 ) {
7011 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007012 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007013 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007014#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007015 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7016#endif
7017 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007018 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007019 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007020#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007021 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7022#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007023 }
7024
7025 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007026 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007027
7028#ifdef KMP_DEBUG
7029 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7030 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7031 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7032 }
7033#endif /* KMP_DEBUG */
7034
7035 /* release the worker threads so they may begin working */
7036 __kmp_fork_barrier( gtid, 0 );
7037}
7038
7039
7040void
7041__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7042{
7043 kmp_info_t *this_thr = __kmp_threads[gtid];
7044
7045 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007046 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007047 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7048 KMP_MB(); /* Flush all pending memory write invalidates. */
7049
7050 /* Join barrier after fork */
7051
7052#ifdef KMP_DEBUG
7053 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7054 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7055 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7056 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7057 __kmp_print_structure();
7058 }
7059 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7060 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7061#endif /* KMP_DEBUG */
7062
7063 __kmp_join_barrier( gtid ); /* wait for everyone */
7064
7065 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007066 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007067}
7068
7069
7070/* ------------------------------------------------------------------------ */
7071/* ------------------------------------------------------------------------ */
7072
7073#ifdef USE_LOAD_BALANCE
7074
7075//
7076// Return the worker threads actively spinning in the hot team, if we
7077// are at the outermost level of parallelism. Otherwise, return 0.
7078//
7079static int
7080__kmp_active_hot_team_nproc( kmp_root_t *root )
7081{
7082 int i;
7083 int retval;
7084 kmp_team_t *hot_team;
7085
7086 if ( root->r.r_active ) {
7087 return 0;
7088 }
7089 hot_team = root->r.r_hot_team;
7090 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7091 return hot_team->t.t_nproc - 1; // Don't count master thread
7092 }
7093
7094 //
7095 // Skip the master thread - it is accounted for elsewhere.
7096 //
7097 retval = 0;
7098 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7099 if ( hot_team->t.t_threads[i]->th.th_active ) {
7100 retval++;
7101 }
7102 }
7103 return retval;
7104}
7105
7106//
7107// Perform an automatic adjustment to the number of
7108// threads used by the next parallel region.
7109//
7110static int
7111__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7112{
7113 int retval;
7114 int pool_active;
7115 int hot_team_active;
7116 int team_curr_active;
7117 int system_active;
7118
7119 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7120 root, set_nproc ) );
7121 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007122 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007123 KMP_DEBUG_ASSERT( set_nproc > 1 );
7124
7125 if ( set_nproc == 1) {
7126 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7127 return 1;
7128 }
7129
7130 //
7131 // Threads that are active in the thread pool, active in the hot team
7132 // for this particular root (if we are at the outer par level), and
7133 // the currently executing thread (to become the master) are available
7134 // to add to the new team, but are currently contributing to the system
7135 // load, and must be accounted for.
7136 //
7137 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7138 hot_team_active = __kmp_active_hot_team_nproc( root );
7139 team_curr_active = pool_active + hot_team_active + 1;
7140
7141 //
7142 // Check the system load.
7143 //
7144 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7145 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7146 system_active, pool_active, hot_team_active ) );
7147
7148 if ( system_active < 0 ) {
7149 //
7150 // There was an error reading the necessary info from /proc,
7151 // so use the thread limit algorithm instead. Once we set
7152 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7153 // we shouldn't wind up getting back here.
7154 //
7155 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7156 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7157
7158 //
7159 // Make this call behave like the thread limit algorithm.
7160 //
7161 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7162 : root->r.r_hot_team->t.t_nproc);
7163 if ( retval > set_nproc ) {
7164 retval = set_nproc;
7165 }
7166 if ( retval < KMP_MIN_NTH ) {
7167 retval = KMP_MIN_NTH;
7168 }
7169
7170 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7171 return retval;
7172 }
7173
7174 //
7175 // There is a slight delay in the load balance algorithm in detecting
7176 // new running procs. The real system load at this instant should be
7177 // at least as large as the #active omp thread that are available to
7178 // add to the team.
7179 //
7180 if ( system_active < team_curr_active ) {
7181 system_active = team_curr_active;
7182 }
7183 retval = __kmp_avail_proc - system_active + team_curr_active;
7184 if ( retval > set_nproc ) {
7185 retval = set_nproc;
7186 }
7187 if ( retval < KMP_MIN_NTH ) {
7188 retval = KMP_MIN_NTH;
7189 }
7190
7191 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7192 return retval;
7193} // __kmp_load_balance_nproc()
7194
7195#endif /* USE_LOAD_BALANCE */
7196
Jim Cownie5e8470a2013-09-27 10:38:44 +00007197/* ------------------------------------------------------------------------ */
7198/* ------------------------------------------------------------------------ */
7199
7200/* NOTE: this is called with the __kmp_init_lock held */
7201void
7202__kmp_cleanup( void )
7203{
7204 int f;
7205
7206 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7207
7208 if (TCR_4(__kmp_init_parallel)) {
7209#if KMP_HANDLE_SIGNALS
7210 __kmp_remove_signals();
7211#endif
7212 TCW_4(__kmp_init_parallel, FALSE);
7213 }
7214
7215 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007216#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007217 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007218#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007219 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007220 TCW_4(__kmp_init_middle, FALSE);
7221 }
7222
7223 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7224
7225 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007226 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007227 __kmp_init_serial = FALSE;
7228 }
7229
7230 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7231 if ( __kmp_root[ f ] != NULL ) {
7232 __kmp_free( __kmp_root[ f ] );
7233 __kmp_root[ f ] = NULL;
7234 }
7235 }
7236 __kmp_free( __kmp_threads );
7237 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7238 // freeing __kmp_root.
7239 __kmp_threads = NULL;
7240 __kmp_root = NULL;
7241 __kmp_threads_capacity = 0;
7242
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007243#if KMP_USE_DYNAMIC_LOCK
7244 __kmp_cleanup_indirect_user_locks();
7245#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007246 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007247#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007248
Alp Toker98758b02014-03-02 04:12:06 +00007249 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007250 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7251 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007252 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007253
7254 #if KMP_USE_ADAPTIVE_LOCKS
7255 #if KMP_DEBUG_ADAPTIVE_LOCKS
7256 __kmp_print_speculative_stats();
7257 #endif
7258 #endif
7259 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7260 __kmp_nested_nth.nth = NULL;
7261 __kmp_nested_nth.size = 0;
7262 __kmp_nested_nth.used = 0;
7263
7264 __kmp_i18n_catclose();
7265
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007266#if KMP_STATS_ENABLED
7267 __kmp_accumulate_stats_at_exit();
7268 __kmp_stats_list.deallocate();
7269#endif
7270
Jim Cownie5e8470a2013-09-27 10:38:44 +00007271 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7272}
7273
7274/* ------------------------------------------------------------------------ */
7275/* ------------------------------------------------------------------------ */
7276
7277int
7278__kmp_ignore_mppbeg( void )
7279{
7280 char *env;
7281
7282 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7283 if (__kmp_str_match_false( env ))
7284 return FALSE;
7285 }
7286 // By default __kmpc_begin() is no-op.
7287 return TRUE;
7288}
7289
7290int
7291__kmp_ignore_mppend( void )
7292{
7293 char *env;
7294
7295 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7296 if (__kmp_str_match_false( env ))
7297 return FALSE;
7298 }
7299 // By default __kmpc_end() is no-op.
7300 return TRUE;
7301}
7302
7303void
7304__kmp_internal_begin( void )
7305{
7306 int gtid;
7307 kmp_root_t *root;
7308
7309 /* this is a very important step as it will register new sibling threads
7310 * and assign these new uber threads a new gtid */
7311 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007312 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007313 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7314
7315 if( root->r.r_begin ) return;
7316 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7317 if( root->r.r_begin ) {
7318 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7319 return;
7320 }
7321
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007322 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007323
7324 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7325}
7326
7327
7328/* ------------------------------------------------------------------------ */
7329/* ------------------------------------------------------------------------ */
7330
7331void
7332__kmp_user_set_library (enum library_type arg)
7333{
7334 int gtid;
7335 kmp_root_t *root;
7336 kmp_info_t *thread;
7337
7338 /* first, make sure we are initialized so we can get our gtid */
7339
7340 gtid = __kmp_entry_gtid();
7341 thread = __kmp_threads[ gtid ];
7342
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007343 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007344
7345 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7346 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7347 KMP_WARNING( SetLibraryIncorrectCall );
7348 return;
7349 }
7350
7351 switch ( arg ) {
7352 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007353 thread->th.th_set_nproc = 0;
7354 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007355 break;
7356 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007357 thread->th.th_set_nproc = 0;
7358 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007359 break;
7360 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007361 thread->th.th_set_nproc = 0;
7362 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007363 break;
7364 default:
7365 KMP_FATAL( UnknownLibraryType, arg );
7366 }
7367
7368 __kmp_aux_set_library ( arg );
7369}
7370
7371void
7372__kmp_aux_set_stacksize( size_t arg )
7373{
7374 if (! __kmp_init_serial)
7375 __kmp_serial_initialize();
7376
7377#if KMP_OS_DARWIN
7378 if (arg & (0x1000 - 1)) {
7379 arg &= ~(0x1000 - 1);
7380 if(arg + 0x1000) /* check for overflow if we round up */
7381 arg += 0x1000;
7382 }
7383#endif
7384 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7385
7386 /* only change the default stacksize before the first parallel region */
7387 if (! TCR_4(__kmp_init_parallel)) {
7388 size_t value = arg; /* argument is in bytes */
7389
7390 if (value < __kmp_sys_min_stksize )
7391 value = __kmp_sys_min_stksize ;
7392 else if (value > KMP_MAX_STKSIZE)
7393 value = KMP_MAX_STKSIZE;
7394
7395 __kmp_stksize = value;
7396
7397 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7398 }
7399
7400 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7401}
7402
7403/* set the behaviour of the runtime library */
7404/* TODO this can cause some odd behaviour with sibling parallelism... */
7405void
7406__kmp_aux_set_library (enum library_type arg)
7407{
7408 __kmp_library = arg;
7409
7410 switch ( __kmp_library ) {
7411 case library_serial :
7412 {
7413 KMP_INFORM( LibraryIsSerial );
7414 (void) __kmp_change_library( TRUE );
7415 }
7416 break;
7417 case library_turnaround :
7418 (void) __kmp_change_library( TRUE );
7419 break;
7420 case library_throughput :
7421 (void) __kmp_change_library( FALSE );
7422 break;
7423 default:
7424 KMP_FATAL( UnknownLibraryType, arg );
7425 }
7426}
7427
7428/* ------------------------------------------------------------------------ */
7429/* ------------------------------------------------------------------------ */
7430
7431void
7432__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7433{
7434 int blocktime = arg; /* argument is in milliseconds */
7435 int bt_intervals;
7436 int bt_set;
7437
7438 __kmp_save_internal_controls( thread );
7439
7440 /* Normalize and set blocktime for the teams */
7441 if (blocktime < KMP_MIN_BLOCKTIME)
7442 blocktime = KMP_MIN_BLOCKTIME;
7443 else if (blocktime > KMP_MAX_BLOCKTIME)
7444 blocktime = KMP_MAX_BLOCKTIME;
7445
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007446 set__blocktime_team( thread->th.th_team, tid, blocktime );
7447 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007448
7449 /* Calculate and set blocktime intervals for the teams */
7450 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7451
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007452 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7453 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007454
7455 /* Set whether blocktime has been set to "TRUE" */
7456 bt_set = TRUE;
7457
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007458 set__bt_set_team( thread->th.th_team, tid, bt_set );
7459 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007460 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7461 __kmp_gtid_from_tid(tid, thread->th.th_team),
7462 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7463}
7464
7465void
7466__kmp_aux_set_defaults(
7467 char const * str,
7468 int len
7469) {
7470 if ( ! __kmp_init_serial ) {
7471 __kmp_serial_initialize();
7472 };
7473 __kmp_env_initialize( str );
7474
7475 if (__kmp_settings
7476#if OMP_40_ENABLED
7477 || __kmp_display_env || __kmp_display_env_verbose
7478#endif // OMP_40_ENABLED
7479 ) {
7480 __kmp_env_print();
7481 }
7482} // __kmp_aux_set_defaults
7483
7484/* ------------------------------------------------------------------------ */
7485
7486/*
7487 * internal fast reduction routines
7488 */
7489
Jim Cownie5e8470a2013-09-27 10:38:44 +00007490PACKED_REDUCTION_METHOD_T
7491__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7492 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7493 kmp_critical_name *lck )
7494{
7495
7496 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7497 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7498 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7499 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7500
7501 PACKED_REDUCTION_METHOD_T retval;
7502
7503 int team_size;
7504
7505 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7506 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7507
7508 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7509 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7510
7511 retval = critical_reduce_block;
7512
7513 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7514
7515 if( team_size == 1 ) {
7516
7517 retval = empty_reduce_block;
7518
7519 } else {
7520
7521 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7522 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7523
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007524 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007525
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007526 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007527
7528 int teamsize_cutoff = 4;
7529
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007530#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7531 if( __kmp_mic_type != non_mic ) {
7532 teamsize_cutoff = 8;
7533 }
7534#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007535 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007536 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007537 if ( atomic_available ) {
7538 retval = atomic_reduce_block;
7539 }
7540 } else {
7541 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7542 }
7543 } else if ( atomic_available ) {
7544 retval = atomic_reduce_block;
7545 }
7546 #else
7547 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007548 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007549
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007550 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
Jim Cownie5e8470a2013-09-27 10:38:44 +00007551
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007552 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007553
Jim Cownie5e8470a2013-09-27 10:38:44 +00007554 // basic tuning
7555
7556 if( atomic_available ) {
7557 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7558 retval = atomic_reduce_block;
7559 }
7560 } // otherwise: use critical section
7561
7562 #elif KMP_OS_DARWIN
7563
Jim Cownie5e8470a2013-09-27 10:38:44 +00007564 if( atomic_available && ( num_vars <= 3 ) ) {
7565 retval = atomic_reduce_block;
7566 } else if( tree_available ) {
7567 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7568 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7569 }
7570 } // otherwise: use critical section
7571
7572 #else
7573 #error "Unknown or unsupported OS"
7574 #endif
7575
7576 #else
7577 #error "Unknown or unsupported architecture"
7578 #endif
7579
7580 }
7581
Jim Cownie5e8470a2013-09-27 10:38:44 +00007582 // KMP_FORCE_REDUCTION
7583
Andrey Churbanovec23a952015-08-17 10:12:12 +00007584 // If the team is serialized (team_size == 1), ignore the forced reduction
7585 // method and stay with the unsynchronized method (empty_reduce_block)
7586 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007587
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007588 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007589
7590 int atomic_available, tree_available;
7591
7592 switch( ( forced_retval = __kmp_force_reduction_method ) )
7593 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007594 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007595 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007596 break;
7597
7598 case atomic_reduce_block:
7599 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007600 if( ! atomic_available ) {
7601 KMP_WARNING(RedMethodNotSupported, "atomic");
7602 forced_retval = critical_reduce_block;
7603 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007604 break;
7605
7606 case tree_reduce_block:
7607 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007608 if( ! tree_available ) {
7609 KMP_WARNING(RedMethodNotSupported, "tree");
7610 forced_retval = critical_reduce_block;
7611 } else {
7612 #if KMP_FAST_REDUCTION_BARRIER
7613 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7614 #endif
7615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007616 break;
7617
7618 default:
7619 KMP_ASSERT( 0 ); // "unsupported method specified"
7620 }
7621
7622 retval = forced_retval;
7623 }
7624
7625 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7626
7627 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7628 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7629
7630 return ( retval );
7631}
7632
7633// this function is for testing set/get/determine reduce method
7634kmp_int32
7635__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007636 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007637}
7638
7639/* ------------------------------------------------------------------------ */