blob: 55d080b9062822a969b9eb63efa71c5e3ea0d4b1 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000028
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000029#if OMPT_SUPPORT
30#include "ompt-specific.h"
31#endif
32
Jim Cownie5e8470a2013-09-27 10:38:44 +000033/* these are temporary issues to be dealt with */
34#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#if KMP_OS_WINDOWS
37#include <process.h>
38#endif
39
40
41#if defined(KMP_GOMP_COMPAT)
42char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
43#endif /* defined(KMP_GOMP_COMPAT) */
44
45char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
46#if OMP_40_ENABLED
47 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000048#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000049 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000050#endif
51
52#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000053char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif /* KMP_DEBUG */
55
Jim Cownie181b4bb2013-12-23 17:28:57 +000056#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
57
Jim Cownie5e8470a2013-09-27 10:38:44 +000058/* ------------------------------------------------------------------------ */
59/* ------------------------------------------------------------------------ */
60
61kmp_info_t __kmp_monitor;
62
63/* ------------------------------------------------------------------------ */
64/* ------------------------------------------------------------------------ */
65
66/* Forward declarations */
67
68void __kmp_cleanup( void );
69
70static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000071static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000072#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000073static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000074#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000075static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000076void __kmp_fork_barrier( int gtid, int tid );
77void __kmp_join_barrier( int gtid );
78void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000079
Jim Cownie5e8470a2013-09-27 10:38:44 +000080#ifdef USE_LOAD_BALANCE
81static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
82#endif
83
84static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000086static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000088static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
89static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
90static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
91
92/* ------------------------------------------------------------------------ */
93/* ------------------------------------------------------------------------ */
94
95/* Calculate the identifier of the current thread */
96/* fast (and somewhat portable) way to get unique */
97/* identifier of executing thread. */
98/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
99
100int
101__kmp_get_global_thread_id( )
102{
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
109
110 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
112
113 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
114 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
115 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
116 __kmp_init_gtid for this to work. */
117
118 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
119
120#ifdef KMP_TDATA_GTID
121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
123 return __kmp_gtid;
124 }
125#endif
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
129 }
130 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
131
132 stack_addr = (char*) & stack_data;
133 other_threads = __kmp_threads;
134
135 /*
136 ATT: The code below is a source of potential bugs due to unsynchronized access to
137 __kmp_threads array. For example:
138 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
139 2. Current thread is suspended by OS.
140 3. Another thread unregisters and finishes (debug versions of free() may fill memory
141 with something like 0xEF).
142 4. Current thread is resumed.
143 5. Current thread reads junk from *thr.
144 TODO: Fix it.
145 --ln
146 */
147
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
149
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
151 if( !thr ) continue;
152
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155
156 /* stack grows down -- search through all of the active threads */
157
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
160
161 if( stack_diff <= stack_size ) {
162 /* The only way we can be closer than the allocated */
163 /* stack size is if we are running on this thread. */
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
165 return i;
166 }
167 }
168 }
169
170 /* get specific to try and determine our gtid */
171 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
174
175 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
176
177 /* if we havn't been assigned a gtid, then return code */
178 if( i<0 ) return i;
179
180 /* dynamically updated stack window for uber threads to avoid get_specific call */
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
183 }
184
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
190 } else {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
192 }
193
194 /* Reprint stack bounds for ubermaster since they have been refined */
195 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000196 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000199 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000200 "th_%d stack (refinement)", i );
201 }
202 return i;
203}
204
205int
206__kmp_get_global_thread_id_reg( )
207{
208 int gtid;
209
210 if ( !__kmp_init_serial ) {
211 gtid = KMP_GTID_DNE;
212 } else
213#ifdef KMP_TDATA_GTID
214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
216 gtid = __kmp_gtid;
217 } else
218#endif
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
222 } else {
223 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
225 }
226
227 /* we must be a new uber master sibling thread */
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
235 } else {
236 gtid = __kmp_register_root(FALSE);
237 }
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
239 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
240 }
241
242 KMP_DEBUG_ASSERT( gtid >=0 );
243
244 return gtid;
245}
246
247/* caller must hold forkjoin_lock */
248void
249__kmp_check_stack_overlap( kmp_info_t *th )
250{
251 int f;
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
254 int gtid;
255
256 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000258 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
261 gtid = __kmp_gtid_from_thread( th );
262
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)", "mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
267 } else {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 }
272 }
273
274 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 {
278 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000280 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 }
283
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
286
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
293
294 /* Print the other stack values before the abort */
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
300
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
302 }
303 }
304 }
305 }
306 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
307}
308
309
310/* ------------------------------------------------------------------------ */
311
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312/* ------------------------------------------------------------------------ */
313
314void
315__kmp_infinite_loop( void )
316{
317 static int done = FALSE;
318
319 while (! done) {
320 KMP_YIELD( 1 );
321 }
322}
323
324#define MAX_MESSAGE 512
325
326void
327__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
328 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 va_list ap;
330
331 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000332 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000336 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 if(gtid >= 0) {
338 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
341 if(node < 0) /* doesn't work, so don't try this next time */
342 __kmp_storage_map_verbose = FALSE;
343 else {
344 char *last;
345 int lastNode;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
347
348 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
350 if(localProc >= 0)
351 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
352 else
353 __kmp_printf_no_lock(" GTID %d\n", gtid);
354# if KMP_USE_PRCTL
355/* The more elaborate format is disabled for now because of the prctl hanging bug. */
356 do {
357 last = p1;
358 lastNode = node;
359 /* This loop collates adjacent pages with the same host node. */
360 do {
361 (char*)p1 += PAGE_SIZE;
362 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
364 (char*)p1 - 1, lastNode);
365 } while(p1 <= p2);
366# else
367 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
368 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
369 if(p1 < p2) {
370 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
371 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
372 }
373# endif
374 }
375 }
376 } else
377 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
378 }
379#endif /* KMP_PRINT_DATA_PLACEMENT */
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
381}
382
383void
384__kmp_warn( char const * format, ... )
385{
386 char buffer[MAX_MESSAGE];
387 va_list ap;
388
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
390 return;
391 }
392
393 va_start( ap, format );
394
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000395 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
399
400 va_end( ap );
401}
402
403void
404__kmp_abort_process()
405{
406
407 // Later threads may stall here, but that's ok because abort() will kill them.
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
409
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
412 }; // if
413
414 if ( KMP_OS_WINDOWS ) {
415 // Let other threads know of abnormal termination and prevent deadlock
416 // if abort happened during library initialization or shutdown
417 __kmp_global.g.g_abort = SIGABRT;
418
419 /*
420 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
421 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
422 works well, but this function is not available in VS7 (this is not problem for DLL, but
423 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
424 not help, at least in some versions of MS C RTL.
425
426 It seems following sequence is the only way to simulate abort() and avoid pop-up error
427 box.
428 */
429 raise( SIGABRT );
430 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
431 } else {
432 abort();
433 }; // if
434
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
437
438} // __kmp_abort_process
439
440void
441__kmp_abort_thread( void )
442{
443 // TODO: Eliminate g_abort global variable and this function.
444 // In case of abort just call abort(), it will kill all the threads.
445 __kmp_infinite_loop();
446} // __kmp_abort_thread
447
448/* ------------------------------------------------------------------------ */
449
450/*
451 * Print out the storage map for the major kmp_info_t thread data structures
452 * that are allocated together.
453 */
454
455static void
456__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
457{
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
459
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
462
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
472
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
476
477 #if KMP_FAST_REDUCTION_BARRIER
478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER
482}
483
484/*
485 * Print out the storage map for the major kmp_team_t team data structures
486 * that are allocated together.
487 */
488
489static void
490__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
491{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id );
495
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
498
499
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
502
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
505
506 #if KMP_FAST_REDUCTION_BARRIER
507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
513
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
519 header, team_id );
520
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
524}
525
526static void __kmp_init_allocator() {}
527static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000528
529/* ------------------------------------------------------------------------ */
530
Jonathan Peyton99016992015-05-26 17:32:53 +0000531#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532# if KMP_OS_WINDOWS
533
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534static void
535__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
536 // TODO: Change to __kmp_break_bootstrap_lock().
537 __kmp_init_bootstrap_lock( lck ); // make the lock released
538}
539
540static void
541__kmp_reset_locks_on_process_detach( int gtid_req ) {
542 int i;
543 int thread_count;
544
545 // PROCESS_DETACH is expected to be called by a thread
546 // that executes ProcessExit() or FreeLibrary().
547 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
548 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
549 // However, in fact, some threads can be still alive here, although being about to be terminated.
550 // The threads in the array with ds_thread==0 are most suspicious.
551 // Actually, it can be not safe to access the __kmp_threads[].
552
553 // TODO: does it make sense to check __kmp_roots[] ?
554
555 // Let's check that there are no other alive threads registered with the OMP lib.
556 while( 1 ) {
557 thread_count = 0;
558 for( i = 0; i < __kmp_threads_capacity; ++i ) {
559 if( !__kmp_threads ) continue;
560 kmp_info_t* th = __kmp_threads[ i ];
561 if( th == NULL ) continue;
562 int gtid = th->th.th_info.ds.ds_gtid;
563 if( gtid == gtid_req ) continue;
564 if( gtid < 0 ) continue;
565 DWORD exit_val;
566 int alive = __kmp_is_thread_alive( th, &exit_val );
567 if( alive ) {
568 ++thread_count;
569 }
570 }
571 if( thread_count == 0 ) break; // success
572 }
573
574 // Assume that I'm alone.
575
576 // Now it might be probably safe to check and reset locks.
577 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
578 __kmp_reset_lock( &__kmp_forkjoin_lock );
579 #ifdef KMP_DEBUG
580 __kmp_reset_lock( &__kmp_stdio_lock );
581 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582}
583
584BOOL WINAPI
585DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
586 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
587
588 switch( fdwReason ) {
589
590 case DLL_PROCESS_ATTACH:
591 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
592
593 return TRUE;
594
595 case DLL_PROCESS_DETACH:
596 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
597 __kmp_gtid_get_specific() ));
598
599 if( lpReserved != NULL )
600 {
601 // lpReserved is used for telling the difference:
602 // lpReserved == NULL when FreeLibrary() was called,
603 // lpReserved != NULL when the process terminates.
604 // When FreeLibrary() is called, worker threads remain alive.
605 // So they will release the forkjoin lock by themselves.
606 // When the process terminates, worker threads disappear triggering
607 // the problem of unreleased forkjoin lock as described below.
608
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000609 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 // The problem comes up if that worker thread becomes dead
611 // before it releases the forkjoin lock.
612 // The forkjoin lock remains taken, while the thread
613 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
614 // will try to take the forkjoin lock and will always fail,
615 // so that the application will never finish [normally].
616 // This scenario is possible if __kmpc_end() has not been executed.
617 // It looks like it's not a corner case, but common cases:
618 // - the main function was compiled by an alternative compiler;
619 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
620 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
621 // - alive foreign thread prevented __kmpc_end from doing cleanup.
622
623 // This is a hack to work around the problem.
624 // TODO: !!! to figure out something better.
625 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
626 }
627
628 __kmp_internal_end_library( __kmp_gtid_get_specific() );
629
630 return TRUE;
631
632 case DLL_THREAD_ATTACH:
633 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
634
635 /* if we wanted to register new siblings all the time here call
636 * __kmp_get_gtid(); */
637 return TRUE;
638
639 case DLL_THREAD_DETACH:
640 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
641 __kmp_gtid_get_specific() ));
642
643 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
644 return TRUE;
645 }
646
647 return TRUE;
648}
649
650# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000651#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000652
653
654/* ------------------------------------------------------------------------ */
655
656/* Change the library type to "status" and return the old type */
657/* called from within initialization routines where __kmp_initz_lock is held */
658int
659__kmp_change_library( int status )
660{
661 int old_status;
662
663 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
664
665 if (status) {
666 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
667 }
668 else {
669 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
670 }
671
672 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
673}
674
675/* ------------------------------------------------------------------------ */
676/* ------------------------------------------------------------------------ */
677
678/* __kmp_parallel_deo --
679 * Wait until it's our turn.
680 */
681void
682__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
683{
684 int gtid = *gtid_ref;
685#ifdef BUILD_PARALLEL_ORDERED
686 kmp_team_t *team = __kmp_team_from_gtid( gtid );
687#endif /* BUILD_PARALLEL_ORDERED */
688
689 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000690 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
692 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
693#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 }
697#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000699 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KMP_MB();
702 }
703#endif /* BUILD_PARALLEL_ORDERED */
704}
705
706/* __kmp_parallel_dxo --
707 * Signal the next task.
708 */
709
710void
711__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
712{
713 int gtid = *gtid_ref;
714#ifdef BUILD_PARALLEL_ORDERED
715 int tid = __kmp_tid_from_gtid( gtid );
716 kmp_team_t *team = __kmp_team_from_gtid( gtid );
717#endif /* BUILD_PARALLEL_ORDERED */
718
719 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000720 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
722 }
723#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000724 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725 KMP_MB(); /* Flush all pending memory write invalidates. */
726
727 /* use the tid of the next thread in this team */
728 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000731#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000732 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
734 /* accept blame for "ordered" waiting */
735 kmp_info_t *this_thread = __kmp_threads[gtid];
736 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
737 this_thread->th.ompt_thread_info.wait_id);
738 }
739#endif
740
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741 KMP_MB(); /* Flush all pending memory write invalidates. */
742 }
743#endif /* BUILD_PARALLEL_ORDERED */
744}
745
746/* ------------------------------------------------------------------------ */
747/* ------------------------------------------------------------------------ */
748
749/* ------------------------------------------------------------------------ */
750/* ------------------------------------------------------------------------ */
751
752/* The BARRIER for a SINGLE process section is always explicit */
753
754int
755__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
756{
757 int status;
758 kmp_info_t *th;
759 kmp_team_t *team;
760
761 if( ! TCR_4(__kmp_init_parallel) )
762 __kmp_parallel_initialize();
763
764 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766 status = 0;
767
768 th->th.th_ident = id_ref;
769
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000770 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 status = 1;
772 } else {
773 kmp_int32 old_this = th->th.th_local.this_construct;
774
775 ++th->th.th_local.this_construct;
776 /* try to set team count to thread count--success means thread got the
777 single block
778 */
779 /* TODO: Should this be acquire or release? */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000780 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781 th->th.th_local.this_construct);
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000782#if USE_ITT_BUILD
783 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
784#if OMP_40_ENABLED
785 th->th.th_teams_microtask == NULL &&
786#endif
787 team->t.t_active_level == 1 )
788 { // Only report metadata by master of active team at level 1
789 __kmp_itt_metadata_single( id_ref );
790 }
791#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792 }
793
794 if( __kmp_env_consistency_check ) {
795 if (status && push_ws) {
796 __kmp_push_workshare( gtid, ct_psingle, id_ref );
797 } else {
798 __kmp_check_workshare( gtid, ct_psingle, id_ref );
799 }
800 }
801#if USE_ITT_BUILD
802 if ( status ) {
803 __kmp_itt_single_start( gtid );
804 }
805#endif /* USE_ITT_BUILD */
806 return status;
807}
808
809void
810__kmp_exit_single( int gtid )
811{
812#if USE_ITT_BUILD
813 __kmp_itt_single_end( gtid );
814#endif /* USE_ITT_BUILD */
815 if( __kmp_env_consistency_check )
816 __kmp_pop_workshare( gtid, ct_psingle, NULL );
817}
818
819
Jim Cownie5e8470a2013-09-27 10:38:44 +0000820/*
821 * determine if we can go parallel or must use a serialized parallel region and
822 * how many threads we can use
823 * set_nproc is the number of threads requested for the team
824 * returns 0 if we should serialize or only use one thread,
825 * otherwise the number of threads to use
826 * The forkjoin lock is held by the caller.
827 */
828static int
829__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
830 int master_tid, int set_nthreads
831#if OMP_40_ENABLED
832 , int enter_teams
833#endif /* OMP_40_ENABLED */
834)
835{
836 int capacity;
837 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838 KMP_DEBUG_ASSERT( __kmp_init_serial );
839 KMP_DEBUG_ASSERT( root && parent_team );
840
841 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842 // If dyn-var is set, dynamically adjust the number of desired threads,
843 // according to the method specified by dynamic_mode.
844 //
845 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
847 ;
848 }
849#ifdef USE_LOAD_BALANCE
850 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
851 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
852 if ( new_nthreads == 1 ) {
853 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
854 master_tid ));
855 return 1;
856 }
857 if ( new_nthreads < set_nthreads ) {
858 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
859 master_tid, new_nthreads ));
860 }
861 }
862#endif /* USE_LOAD_BALANCE */
863 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
864 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
865 : root->r.r_hot_team->t.t_nproc);
866 if ( new_nthreads <= 1 ) {
867 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
868 master_tid ));
869 return 1;
870 }
871 if ( new_nthreads < set_nthreads ) {
872 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
873 master_tid, new_nthreads ));
874 }
875 else {
876 new_nthreads = set_nthreads;
877 }
878 }
879 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
880 if ( set_nthreads > 2 ) {
881 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
882 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
883 if ( new_nthreads == 1 ) {
884 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
885 master_tid ));
886 return 1;
887 }
888 if ( new_nthreads < set_nthreads ) {
889 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
890 master_tid, new_nthreads ));
891 }
892 }
893 }
894 else {
895 KMP_ASSERT( 0 );
896 }
897
898 //
899 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
900 //
901 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
902 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
903 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
904 root->r.r_hot_team->t.t_nproc );
905 if ( tl_nthreads <= 0 ) {
906 tl_nthreads = 1;
907 }
908
909 //
910 // If dyn-var is false, emit a 1-time warning.
911 //
912 if ( ! get__dynamic_2( parent_team, master_tid )
913 && ( ! __kmp_reserve_warn ) ) {
914 __kmp_reserve_warn = 1;
915 __kmp_msg(
916 kmp_ms_warning,
917 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
918 KMP_HNT( Unset_ALL_THREADS ),
919 __kmp_msg_null
920 );
921 }
922 if ( tl_nthreads == 1 ) {
923 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
924 master_tid ));
925 return 1;
926 }
927 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
928 master_tid, tl_nthreads ));
929 new_nthreads = tl_nthreads;
930 }
931
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932 //
933 // Check if the threads array is large enough, or needs expanding.
934 //
935 // See comment in __kmp_register_root() about the adjustment if
936 // __kmp_threads[0] == NULL.
937 //
938 capacity = __kmp_threads_capacity;
939 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
940 --capacity;
941 }
942 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
943 root->r.r_hot_team->t.t_nproc ) > capacity ) {
944 //
945 // Expand the threads array.
946 //
947 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
948 root->r.r_hot_team->t.t_nproc ) - capacity;
949 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
950 if ( slotsAdded < slotsRequired ) {
951 //
952 // The threads array was not expanded enough.
953 //
954 new_nthreads -= ( slotsRequired - slotsAdded );
955 KMP_ASSERT( new_nthreads >= 1 );
956
957 //
958 // If dyn-var is false, emit a 1-time warning.
959 //
960 if ( ! get__dynamic_2( parent_team, master_tid )
961 && ( ! __kmp_reserve_warn ) ) {
962 __kmp_reserve_warn = 1;
963 if ( __kmp_tp_cached ) {
964 __kmp_msg(
965 kmp_ms_warning,
966 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
967 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
968 KMP_HNT( PossibleSystemLimitOnThreads ),
969 __kmp_msg_null
970 );
971 }
972 else {
973 __kmp_msg(
974 kmp_ms_warning,
975 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
976 KMP_HNT( SystemLimitOnThreads ),
977 __kmp_msg_null
978 );
979 }
980 }
981 }
982 }
983
984 if ( new_nthreads == 1 ) {
985 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
986 __kmp_get_gtid(), set_nthreads ) );
987 return 1;
988 }
989
990 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
991 __kmp_get_gtid(), new_nthreads, set_nthreads ));
992 return new_nthreads;
993}
994
995/* ------------------------------------------------------------------------ */
996/* ------------------------------------------------------------------------ */
997
998/* allocate threads from the thread pool and assign them to the new team */
999/* we are assured that there are enough threads available, because we
1000 * checked on that earlier within critical section forkjoin */
1001
1002static void
1003__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1004 kmp_info_t *master_th, int master_gtid )
1005{
1006 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001007 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001008
1009 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1010 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1011 KMP_MB();
1012
1013 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001014 master_th->th.th_info.ds.ds_tid = 0;
1015 master_th->th.th_team = team;
1016 master_th->th.th_team_nproc = team->t.t_nproc;
1017 master_th->th.th_team_master = master_th;
1018 master_th->th.th_team_serialized = FALSE;
1019 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001020
1021 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001022#if KMP_NESTED_HOT_TEAMS
1023 use_hot_team = 0;
1024 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1025 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1026 int level = team->t.t_active_level - 1; // index in array of hot teams
1027 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1028 if( master_th->th.th_teams_size.nteams > 1 ) {
1029 ++level; // level was not increased in teams construct for team_of_masters
1030 }
1031 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1032 master_th->th.th_teams_level == team->t.t_level ) {
1033 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1034 } // team->t.t_level will be increased inside parallel
1035 }
1036 if( level < __kmp_hot_teams_max_level ) {
1037 if( hot_teams[level].hot_team ) {
1038 // hot team has already been allocated for given level
1039 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1040 use_hot_team = 1; // the team is ready to use
1041 } else {
1042 use_hot_team = 0; // AC: threads are not allocated yet
1043 hot_teams[level].hot_team = team; // remember new hot team
1044 hot_teams[level].hot_team_nth = team->t.t_nproc;
1045 }
1046 } else {
1047 use_hot_team = 0;
1048 }
1049 }
1050#else
1051 use_hot_team = team == root->r.r_hot_team;
1052#endif
1053 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001054
1055 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001056 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001057 __kmp_initialize_info( master_th, team, 0, master_gtid );
1058
1059 /* now, install the worker threads */
1060 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1061
1062 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001063 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1064 team->t.t_threads[ i ] = thr;
1065 KMP_DEBUG_ASSERT( thr );
1066 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001068 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1070 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1071 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1072 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073#if OMP_40_ENABLED
1074 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1075 thr->th.th_teams_level = master_th->th.th_teams_level;
1076 thr->th.th_teams_size = master_th->th.th_teams_size;
1077#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078 { // Initialize threads' barrier data.
1079 int b;
1080 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1081 for ( b = 0; b < bs_last_barrier; ++ b ) {
1082 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001084#if USE_DEBUGGER
1085 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1086#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087 }; // for b
1088 }
1089 }
1090
Alp Toker98758b02014-03-02 04:12:06 +00001091#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092 __kmp_partition_places( team );
1093#endif
1094
1095 }
1096
1097 KMP_MB();
1098}
1099
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1101//
1102// Propagate any changes to the floating point control registers out to the team
1103// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1104// so we don't make changes unless they are needed.
1105//
1106inline static void
1107propagateFPControl(kmp_team_t * team)
1108{
1109 if ( __kmp_inherit_fp_control ) {
1110 kmp_int16 x87_fpu_control_word;
1111 kmp_uint32 mxcsr;
1112
1113 // Get master values of FPU control flags (both X87 and vector)
1114 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1115 __kmp_store_mxcsr( &mxcsr );
1116 mxcsr &= KMP_X86_MXCSR_MASK;
1117
1118 // There is no point looking at t_fp_control_saved here.
1119 // If it is TRUE, we still have to update the values if they are different from those we now have.
1120 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1121 // that the values in the team are the same as those we have.
1122 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1123 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1124 // cache-line into a written state, causing all threads in the team to have to read it again.
1125 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1126 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1127 }
1128 if ( team->t.t_mxcsr != mxcsr ) {
1129 team->t.t_mxcsr = mxcsr;
1130 }
1131 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1132 // So we must ensure it is correct.
1133 if (!team->t.t_fp_control_saved) {
1134 team->t.t_fp_control_saved = TRUE;
1135 }
1136 }
1137 else {
1138 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
1139 if (team->t.t_fp_control_saved)
1140 team->t.t_fp_control_saved = FALSE;
1141 }
1142}
1143
1144// Do the opposite, setting the hardware registers to the updated values from the team.
1145inline static void
1146updateHWFPControl(kmp_team_t * team)
1147{
1148 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1149 //
1150 // Only reset the fp control regs if they have been changed in the team.
1151 // the parallel region that we are exiting.
1152 //
1153 kmp_int16 x87_fpu_control_word;
1154 kmp_uint32 mxcsr;
1155 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1156 __kmp_store_mxcsr( &mxcsr );
1157 mxcsr &= KMP_X86_MXCSR_MASK;
1158
1159 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1160 __kmp_clear_x87_fpu_status_word();
1161 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1162 }
1163
1164 if ( team->t.t_mxcsr != mxcsr ) {
1165 __kmp_load_mxcsr( &team->t.t_mxcsr );
1166 }
1167 }
1168}
1169#else
1170# define propagateFPControl(x) ((void)0)
1171# define updateHWFPControl(x) ((void)0)
1172#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1173
Jim Cownie5e8470a2013-09-27 10:38:44 +00001174static void
1175__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1176
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001177/*
1178 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1179 */
1180void
1181__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1182{
1183 kmp_info_t *this_thr;
1184 kmp_team_t *serial_team;
1185
1186 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1187
1188 /* Skip all this code for autopar serialized loops since it results in
1189 unacceptable overhead */
1190 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1191 return;
1192
1193 if( ! TCR_4( __kmp_init_parallel ) )
1194 __kmp_parallel_initialize();
1195
1196 this_thr = __kmp_threads[ global_tid ];
1197 serial_team = this_thr->th.th_serial_team;
1198
1199 /* utilize the serialized team held by this thread */
1200 KMP_DEBUG_ASSERT( serial_team );
1201 KMP_MB();
1202
1203 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001204 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1205 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001206 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1207 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1208 this_thr->th.th_task_team = NULL;
1209 }
1210
1211#if OMP_40_ENABLED
1212 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1213 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1214 proc_bind = proc_bind_false;
1215 }
1216 else if ( proc_bind == proc_bind_default ) {
1217 //
1218 // No proc_bind clause was specified, so use the current value
1219 // of proc-bind-var for this parallel region.
1220 //
1221 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1222 }
1223 //
1224 // Reset for next parallel region
1225 //
1226 this_thr->th.th_set_proc_bind = proc_bind_default;
1227#endif /* OMP_40_ENABLED */
1228
1229 if( this_thr->th.th_team != serial_team ) {
1230 // Nested level will be an index in the nested nthreads array
1231 int level = this_thr->th.th_team->t.t_level;
1232
1233 if( serial_team->t.t_serialized ) {
1234 /* this serial team was already used
1235 * TODO increase performance by making this locks more specific */
1236 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001237
1238 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1239
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001240#if OMPT_SUPPORT
1241 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1242#endif
1243
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001244 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001245#if OMPT_SUPPORT
1246 ompt_parallel_id,
1247#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001248#if OMP_40_ENABLED
1249 proc_bind,
1250#endif
1251 & this_thr->th.th_current_task->td_icvs,
1252 0 USE_NESTED_HOT_ARG(NULL) );
1253 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1254 KMP_ASSERT( new_team );
1255
1256 /* setup new serialized team and install it */
1257 new_team->t.t_threads[0] = this_thr;
1258 new_team->t.t_parent = this_thr->th.th_team;
1259 serial_team = new_team;
1260 this_thr->th.th_serial_team = serial_team;
1261
1262 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1263 global_tid, serial_team ) );
1264
1265
1266 /* TODO the above breaks the requirement that if we run out of
1267 * resources, then we can still guarantee that serialized teams
1268 * are ok, since we may need to allocate a new one */
1269 } else {
1270 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1271 global_tid, serial_team ) );
1272 }
1273
1274 /* we have to initialize this serial team */
1275 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1277 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1278 serial_team->t.t_ident = loc;
1279 serial_team->t.t_serialized = 1;
1280 serial_team->t.t_nproc = 1;
1281 serial_team->t.t_parent = this_thr->th.th_team;
1282 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1283 this_thr->th.th_team = serial_team;
1284 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1285
1286 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1287 global_tid, this_thr->th.th_current_task ) );
1288 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1289 this_thr->th.th_current_task->td_flags.executing = 0;
1290
1291 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1292
1293 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1294 each serialized task represented by team->t.t_serialized? */
1295 copy_icvs(
1296 & this_thr->th.th_current_task->td_icvs,
1297 & this_thr->th.th_current_task->td_parent->td_icvs );
1298
1299 // Thread value exists in the nested nthreads array for the next nested level
1300 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1301 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1302 }
1303
1304#if OMP_40_ENABLED
1305 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1306 this_thr->th.th_current_task->td_icvs.proc_bind
1307 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1308 }
1309#endif /* OMP_40_ENABLED */
1310
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001311#if USE_DEBUGGER
1312 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1313#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001314 this_thr->th.th_info.ds.ds_tid = 0;
1315
1316 /* set thread cache values */
1317 this_thr->th.th_team_nproc = 1;
1318 this_thr->th.th_team_master = this_thr;
1319 this_thr->th.th_team_serialized = 1;
1320
1321 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1322 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1323
1324 propagateFPControl (serial_team);
1325
1326 /* check if we need to allocate dispatch buffers stack */
1327 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1328 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1329 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1330 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1331 }
1332 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1333
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001334#if OMPT_SUPPORT
1335 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1336 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1337#endif
1338
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339 KMP_MB();
1340
1341 } else {
1342 /* this serialized team is already being used,
1343 * that's fine, just add another nested level */
1344 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1345 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1347 ++ serial_team->t.t_serialized;
1348 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1349
1350 // Nested level will be an index in the nested nthreads array
1351 int level = this_thr->th.th_team->t.t_level;
1352 // Thread value exists in the nested nthreads array for the next nested level
1353 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1354 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1355 }
1356 serial_team->t.t_level++;
1357 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1358 global_tid, serial_team, serial_team->t.t_level ) );
1359
1360 /* allocate/push dispatch buffers stack */
1361 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1362 {
1363 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1364 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1365 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1366 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1367 }
1368 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1369
1370 KMP_MB();
1371 }
1372
1373 if ( __kmp_env_consistency_check )
1374 __kmp_push_parallel( global_tid, NULL );
1375
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001376}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001377
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378/* most of the work for a fork */
1379/* return true if we really went parallel, false if serialized */
1380int
1381__kmp_fork_call(
1382 ident_t * loc,
1383 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001384 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001385 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001386#if OMPT_SUPPORT
1387 void *unwrapped_task,
1388#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001389 microtask_t microtask,
1390 launch_t invoker,
1391/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001392#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001393 va_list * ap
1394#else
1395 va_list ap
1396#endif
1397 )
1398{
1399 void **argv;
1400 int i;
1401 int master_tid;
1402 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001403 kmp_team_t *team;
1404 kmp_team_t *parent_team;
1405 kmp_info_t *master_th;
1406 kmp_root_t *root;
1407 int nthreads;
1408 int master_active;
1409 int master_set_numthreads;
1410 int level;
1411#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001412 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413 int teams_level;
1414#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001415#if KMP_NESTED_HOT_TEAMS
1416 kmp_hot_team_ptr_t **p_hot_teams;
1417#endif
1418 { // KMP_TIME_BLOCK
Jonathan Peyton45be4502015-08-11 21:36:41 +00001419 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1420 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421
1422 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001423 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1424 /* Some systems prefer the stack for the root thread(s) to start with */
1425 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001426 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001427 /* These 2 lines below are so this does not get optimized out */
1428 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1429 __kmp_stkpadding += (short)((kmp_int64)dummy);
1430 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001431
1432 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001433 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434 if( ! TCR_4(__kmp_init_parallel) )
1435 __kmp_parallel_initialize();
1436
1437 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001438 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1439 parent_team = master_th->th.th_team;
1440 master_tid = master_th->th.th_info.ds.ds_tid;
1441 master_this_cons = master_th->th.th_local.this_construct;
1442 root = master_th->th.th_root;
1443 master_active = root->r.r_active;
1444 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001445
1446#if OMPT_SUPPORT
1447 ompt_parallel_id_t ompt_parallel_id;
1448 ompt_task_id_t ompt_task_id;
1449 ompt_frame_t *ompt_frame;
1450 ompt_task_id_t my_task_id;
1451 ompt_parallel_id_t my_parallel_id;
1452
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001453 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001454 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1455 ompt_task_id = __ompt_get_task_id_internal(0);
1456 ompt_frame = __ompt_get_task_frame_internal(0);
1457 }
1458#endif
1459
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460 // Nested level will be an index in the nested nthreads array
1461 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001462 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001463#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1465#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001466#if KMP_NESTED_HOT_TEAMS
1467 p_hot_teams = &master_th->th.th_hot_teams;
1468 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1469 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1470 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1471 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1472 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1473 }
1474#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001476#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001477 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001478 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1479 int team_size = master_set_numthreads;
1480
1481 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1482 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001483 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001484 }
1485#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487 master_th->th.th_ident = loc;
1488
1489#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001490 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001491 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1492 // AC: This is start of parallel that is nested inside teams construct.
1493 // The team is actual (hot), all workers are ready at the fork barrier.
1494 // No lock needed to initialize the team a bit, then free workers.
1495 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001496 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497 parent_team->t.t_argc = argc;
1498 argv = (void**)parent_team->t.t_argv;
1499 for( i=argc-1; i >= 0; --i )
1500/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001501#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001502 *argv++ = va_arg( *ap, void * );
1503#else
1504 *argv++ = va_arg( ap, void * );
1505#endif
1506 /* Increment our nested depth levels, but not increase the serialization */
1507 if ( parent_team == master_th->th.th_serial_team ) {
1508 // AC: we are in serialized parallel
1509 __kmpc_serialized_parallel(loc, gtid);
1510 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1511 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1512 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001513
1514#if OMPT_SUPPORT
1515 void *dummy;
1516 void **exit_runtime_p;
1517
1518 ompt_lw_taskteam_t lw_taskteam;
1519
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001520 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001521 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1522 unwrapped_task, ompt_parallel_id);
1523 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1524 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1525
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1527
1528#if OMPT_TRACE
1529 /* OMPT implicit task begin */
1530 my_task_id = lw_taskteam.ompt_task_info.task_id;
1531 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001532 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001533 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1534 my_parallel_id, my_task_id);
1535 }
1536#endif
1537
1538 /* OMPT state */
1539 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1540 } else {
1541 exit_runtime_p = &dummy;
1542 }
1543#endif
1544
Jonathan Peyton45be4502015-08-11 21:36:41 +00001545 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001546 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1547 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001548 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001550 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001551#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001552 );
1553 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001554
1555#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001556 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557#if OMPT_TRACE
1558 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1559
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001560 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1562 ompt_parallel_id, ompt_task_id);
1563 }
1564
1565 __ompt_lw_taskteam_unlink(master_th);
1566 // reset clear the task id only after unlinking the task
1567 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1568#endif
1569
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001570 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001571 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001572 ompt_parallel_id, ompt_task_id,
1573 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001574 }
1575 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1576 }
1577#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578 return TRUE;
1579 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001580
Jim Cownie5e8470a2013-09-27 10:38:44 +00001581 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001582#if OMPT_SUPPORT
1583 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1584#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 parent_team->t.t_invoke = invoker;
1586 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1587 parent_team->t.t_active_level ++;
1588 parent_team->t.t_level ++;
1589
1590 /* Change number of threads in the team if requested */
1591 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001592 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593 // AC: only can reduce the number of threads dynamically, cannot increase
1594 kmp_info_t **other_threads = parent_team->t.t_threads;
1595 parent_team->t.t_nproc = master_set_numthreads;
1596 for ( i = 0; i < master_set_numthreads; ++i ) {
1597 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1598 }
1599 // Keep extra threads hot in the team for possible next parallels
1600 }
1601 master_th->th.th_set_nproc = 0;
1602 }
1603
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001604#if USE_DEBUGGER
1605 if ( __kmp_debugging ) { // Let debugger override number of threads.
1606 int nth = __kmp_omp_num_threads( loc );
1607 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1608 master_set_numthreads = nth;
1609 }; // if
1610 }; // if
1611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612
1613 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1614 __kmp_internal_fork( loc, gtid, parent_team );
1615 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1616
1617 /* Invoke microtask for MASTER thread */
1618 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1619 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1620
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001621 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001622 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1623 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001624 if (! parent_team->t.t_invoke( gtid )) {
1625 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627 }
1628 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1629 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1630 KMP_MB(); /* Flush all pending memory write invalidates. */
1631
1632 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1633
1634 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001635 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636#endif /* OMP_40_ENABLED */
1637
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001638#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001640 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001642#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643
Jim Cownie5e8470a2013-09-27 10:38:44 +00001644 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1645 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001647#if OMP_40_ENABLED
1648 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1649#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001651 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001652
1653 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1654 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1655 if (nthreads > 1) {
1656 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1657#if OMP_40_ENABLED
1658 && !enter_teams
1659#endif /* OMP_40_ENABLED */
1660 ) ) || ( __kmp_library == library_serial ) ) {
1661 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1662 gtid, nthreads ));
1663 nthreads = 1;
1664 }
1665 }
1666 if ( nthreads > 1 ) {
1667 /* determine how many new threads we can use */
1668 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1669
1670 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672/* AC: If we execute teams from parallel region (on host), then teams should be created
1673 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1674 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001675 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001676#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001677 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001678 if ( nthreads == 1 ) {
1679 // Free lock for single thread execution here;
1680 // for multi-thread execution it will be freed later
1681 // after team of threads created and initialized
1682 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1683 }
1684 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685 }
1686 KMP_DEBUG_ASSERT( nthreads > 0 );
1687
1688 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001689 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691 /* create a serialized parallel region? */
1692 if ( nthreads == 1 ) {
1693 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001694#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 void * args[ argc ];
1696#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001697 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001698#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1701
1702 __kmpc_serialized_parallel(loc, gtid);
1703
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707#if OMP_40_ENABLED
1708 if ( !ap ) {
1709 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001710 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001712
1713#if OMPT_SUPPORT
1714 void *dummy;
1715 void **exit_runtime_p;
1716
1717 ompt_lw_taskteam_t lw_taskteam;
1718
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001719 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001720 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1721 unwrapped_task, ompt_parallel_id);
1722 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1723 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1724
1725 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1726
1727#if OMPT_TRACE
1728 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001729 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001730 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1731 ompt_parallel_id, my_task_id);
1732 }
1733#endif
1734
1735 /* OMPT state */
1736 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1737 } else {
1738 exit_runtime_p = &dummy;
1739 }
1740#endif
1741
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001742 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001743 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1744 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1746#if OMPT_SUPPORT
1747 , exit_runtime_p
1748#endif
1749 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751
1752#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001753 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001754 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1755
1756#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001757 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1759 ompt_parallel_id, ompt_task_id);
1760 }
1761#endif
1762
1763 __ompt_lw_taskteam_unlink(master_th);
1764 // reset clear the task id only after unlinking the task
1765 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1766
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001767 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001768 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001769 ompt_parallel_id, ompt_task_id,
1770 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771 }
1772 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1773 }
1774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001775 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1776 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1777 team = master_th->th.th_team;
1778 //team->t.t_pkfn = microtask;
1779 team->t.t_invoke = invoker;
1780 __kmp_alloc_argv_entries( argc, team, TRUE );
1781 team->t.t_argc = argc;
1782 argv = (void**) team->t.t_argv;
1783 if ( ap ) {
1784 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001785// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001786# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001787 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001788# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001789 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001791 } else {
1792 for( i=0; i < argc; ++i )
1793 // Get args from parent team for teams construct
1794 argv[i] = parent_team->t.t_argv[i];
1795 }
1796 // AC: revert change made in __kmpc_serialized_parallel()
1797 // because initial code in teams should have level=0
1798 team->t.t_level--;
1799 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001801 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1802 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 invoker(gtid);
1804 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805 } else {
1806#endif /* OMP_40_ENABLED */
1807 argv = args;
1808 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001810#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001816
1817#if OMPT_SUPPORT
1818 void *dummy;
1819 void **exit_runtime_p;
1820
1821 ompt_lw_taskteam_t lw_taskteam;
1822
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001823 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001824 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1825 unwrapped_task, ompt_parallel_id);
1826 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1827 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1828
1829 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1830
1831#if OMPT_TRACE
1832 /* OMPT implicit task begin */
1833 my_task_id = lw_taskteam.ompt_task_info.task_id;
1834 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001835 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001836 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1837 my_parallel_id, my_task_id);
1838 }
1839#endif
1840
1841 /* OMPT state */
1842 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1843 } else {
1844 exit_runtime_p = &dummy;
1845 }
1846#endif
1847
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001848 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001849 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1850 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001851 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1852#if OMPT_SUPPORT
1853 , exit_runtime_p
1854#endif
1855 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001856 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001857
1858#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001859 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860#if OMPT_TRACE
1861 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1862
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001863 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1865 my_parallel_id, my_task_id);
1866 }
1867#endif
1868
1869 __ompt_lw_taskteam_unlink(master_th);
1870 // reset clear the task id only after unlinking the task
1871 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1872
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001873 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001874 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001875 ompt_parallel_id, ompt_task_id,
1876 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001877 }
1878 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1879 }
1880#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881#if OMP_40_ENABLED
1882 }
1883#endif /* OMP_40_ENABLED */
1884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001885 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886#if OMPT_SUPPORT
1887 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1888 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1889 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1890 unwrapped_task, ompt_parallel_id);
1891
1892 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1893 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1894 __ompt_lw_taskteam_link(lwt, master_th);
1895#endif
1896
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001897 // we were called from GNU native code
1898 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1899 return FALSE;
1900 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001902 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903 }
1904
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 KMP_MB();
1908 return FALSE;
1909 }
1910
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // GEH: only modify the executing flag in the case when not serialized
1912 // serialized case is handled in kmpc_serialized_parallel
1913 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001914 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1915 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1917 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1918 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919
1920#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001921 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922#endif /* OMP_40_ENABLED */
1923 {
1924 /* Increment our nested depth level */
1925 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1926 }
1927
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001930 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1931 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 }
1933 else {
1934 nthreads_icv = 0; // don't update
1935 }
1936
1937#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1942 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
1944 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001945 if (proc_bind == proc_bind_default) {
1946 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1947 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1950 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001952 if ((level+1 < __kmp_nested_proc_bind.used)
1953 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1954 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
1956 }
1957
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 master_th->th.th_set_proc_bind = proc_bind_default;
1960#endif /* OMP_40_ENABLED */
1961
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001962 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001964 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001966 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 new_icvs.nproc = nthreads_icv;
1972 }
1973
1974#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001975 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 new_icvs.proc_bind = proc_bind_icv;
1977 }
1978#endif /* OMP_40_ENABLED */
1979
1980 /* allocate a new parallel team */
1981 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1982 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001983#if OMPT_SUPPORT
1984 ompt_parallel_id,
1985#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001989 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1990 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 /* allocate a new parallel team */
1992 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1993 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001994#if OMPT_SUPPORT
1995 ompt_parallel_id,
1996#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002000 &master_th->th.th_current_task->td_icvs, argc
2001 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002003 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004
2005 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002006 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2007 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2008 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2009 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2010 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002011#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002012 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002013#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002014 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 // TODO: parent_team->t.t_level == INT_MAX ???
2016#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002017 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002019 int new_level = parent_team->t.t_level + 1;
2020 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2021 new_level = parent_team->t.t_active_level + 1;
2022 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#if OMP_40_ENABLED
2024 } else {
2025 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002026 int new_level = parent_team->t.t_level;
2027 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2028 new_level = parent_team->t.t_active_level;
2029 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 }
2031#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2033 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
2034 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002036#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002037 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002038#endif
2039
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 // Update the floating point rounding in the team if required.
2041 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042
2043 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002045#if 0
2046 // Patch out an assertion that trips while the runtime seems to operate correctly.
2047 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002048 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002049#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002051 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002052 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002053
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002054 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002055 // Take a memo of master's task_state
2056 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2057 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002058 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2059 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002060 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002061 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002062 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2063 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2064 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2066 new_stack[i] = 0;
2067 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002068 old_stack = master_th->th.th_task_state_memo_stack;
2069 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002070 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002071 __kmp_free(old_stack);
2072 }
2073 // Store master's task_state on stack
2074 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2075 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002076#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002077 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002078 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2079 }
2080 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002081#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002082 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002083#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002084 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002086 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087#if !KMP_NESTED_HOT_TEAMS
2088 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2089#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002090 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
2092 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2093 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2094 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2095 ( team->t.t_master_tid == 0 &&
2096 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2097 KMP_MB();
2098
2099 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101#if OMP_40_ENABLED
2102 if ( ap ) {
2103#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002104 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002105// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002106#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002107 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002109 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 KMP_CHECK_UPDATE(*argv, new_argv);
2112 argv++;
2113 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#if OMP_40_ENABLED
2115 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002116 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002118 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120 }
2121#endif /* OMP_40_ENABLED */
2122
2123 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002124 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002125 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2126 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002127
2128 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002131#if OMPT_SUPPORT
2132 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
2135 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2136
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002138 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002139# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002140 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 ) {
2143#if USE_ITT_NOTIFY
2144 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2145 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002146 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002147 kmp_uint64 tmp_time = 0;
2148 if ( __itt_get_timestamp_ptr )
2149 tmp_time = __itt_get_timestamp();
2150 // Internal fork - report frame begin
2151 master_th->th.th_frame_time = tmp_time;
2152 if ( __kmp_forkjoin_frames_mode == 3 )
2153 team->t.t_region_time = tmp_time;
2154 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2155#endif /* USE_ITT_NOTIFY */
2156 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2157 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2158 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002159 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2160 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002161 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162#endif /* USE_ITT_BUILD */
2163
2164 /* now go on and do the work */
2165 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2166 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002167 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2168 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169
2170#if USE_ITT_BUILD
2171 if ( __itt_stack_caller_create_ptr ) {
2172 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2173 }
2174#endif /* USE_ITT_BUILD */
2175
2176#if OMP_40_ENABLED
2177 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2178#endif /* OMP_40_ENABLED */
2179 {
2180 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002181 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2182 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183 }
2184
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2187 return TRUE;
2188 }
2189
2190 /* Invoke microtask for MASTER thread */
2191 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2192 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002193 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002194
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002195 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002196 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2197 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00002198 // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 if (! team->t.t_invoke( gtid )) {
2200 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2201 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002202 }
2203 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2204 gtid, team->t.t_id, team->t.t_pkfn ) );
2205 KMP_MB(); /* Flush all pending memory write invalidates. */
2206
2207 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2208
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002209#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002210 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002211 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2212 }
2213#endif
2214
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 return TRUE;
2216}
2217
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002218#if OMPT_SUPPORT
2219static inline void
2220__kmp_join_restore_state(
2221 kmp_info_t *thread,
2222 kmp_team_t *team)
2223{
2224 // restore state outside the region
2225 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2226 ompt_state_work_serial : ompt_state_work_parallel);
2227}
2228
2229static inline void
2230__kmp_join_ompt(
2231 kmp_info_t *thread,
2232 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002233 ompt_parallel_id_t parallel_id,
2234 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235{
2236 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2237 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2238 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002239 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240 }
2241
2242 __kmp_join_restore_state(thread,team);
2243}
2244#endif
2245
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002247__kmp_join_call(ident_t *loc, int gtid
2248#if OMPT_SUPPORT
2249 , enum fork_context_e fork_context
2250#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002251#if OMP_40_ENABLED
2252 , int exit_teams
2253#endif /* OMP_40_ENABLED */
2254)
2255{
Jonathan Peyton45be4502015-08-11 21:36:41 +00002256 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 kmp_team_t *team;
2258 kmp_team_t *parent_team;
2259 kmp_info_t *master_th;
2260 kmp_root_t *root;
2261 int master_active;
2262 int i;
2263
2264 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2265
2266 /* setup current data */
2267 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002268 root = master_th->th.th_root;
2269 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002270 parent_team = team->t.t_parent;
2271
2272 master_th->th.th_ident = loc;
2273
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002274#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002275 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002276 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2277 }
2278#endif
2279
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002280#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2282 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2283 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002284 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2285 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002287#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002288
2289 if( team->t.t_serialized ) {
2290#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002291 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 // We are in teams construct
2293 int level = team->t.t_level;
2294 int tlevel = master_th->th.th_teams_level;
2295 if ( level == tlevel ) {
2296 // AC: we haven't incremented it earlier at start of teams construct,
2297 // so do it here - at the end of teams construct
2298 team->t.t_level++;
2299 } else if ( level == tlevel + 1 ) {
2300 // AC: we are exiting parallel inside teams, need to increment serialization
2301 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2302 team->t.t_serialized++;
2303 }
2304 }
2305#endif /* OMP_40_ENABLED */
2306 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002307
2308#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002309 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002310 __kmp_join_restore_state(master_th, parent_team);
2311 }
2312#endif
2313
Jim Cownie5e8470a2013-09-27 10:38:44 +00002314 return;
2315 }
2316
2317 master_active = team->t.t_master_active;
2318
2319#if OMP_40_ENABLED
2320 if (!exit_teams)
2321#endif /* OMP_40_ENABLED */
2322 {
2323 // AC: No barrier for internal teams at exit from teams construct.
2324 // But there is barrier for external team (league).
2325 __kmp_internal_join( loc, gtid, team );
2326 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002327#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002328 else {
2329 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2330 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002331#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002332
Jim Cownie5e8470a2013-09-27 10:38:44 +00002333 KMP_MB();
2334
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002335#if OMPT_SUPPORT
2336 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2337#endif
2338
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339#if USE_ITT_BUILD
2340 if ( __itt_stack_caller_create_ptr ) {
2341 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2342 }
2343
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002344 // Mark end of "parallel" region for VTune.
2345 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002347 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002349 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002350 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002351 // only one notification scheme (either "submit" or "forking/joined", not both)
2352 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2353 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2354 0, loc, master_th->th.th_team_nproc, 1 );
2355 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2356 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2357 __kmp_itt_region_joined( gtid );
2358 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359#endif /* USE_ITT_BUILD */
2360
2361#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002362 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363 !exit_teams &&
2364 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2365 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2366 // AC: We need to leave the team structure intact at the end
2367 // of parallel inside the teams construct, so that at the next
2368 // parallel same (hot) team works, only adjust nesting levels
2369
2370 /* Decrement our nested depth level */
2371 team->t.t_level --;
2372 team->t.t_active_level --;
2373 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2374
2375 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002378 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002379 kmp_info_t **other_threads = team->t.t_threads;
2380 team->t.t_nproc = new_num;
2381 for ( i = 0; i < old_num; ++i ) {
2382 other_threads[i]->th.th_team_nproc = new_num;
2383 }
2384 // Adjust states of non-used threads of the team
2385 for ( i = old_num; i < new_num; ++i ) {
2386 // Re-initialize thread's barrier data.
2387 int b;
2388 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002389 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002391 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002392#if USE_DEBUGGER
2393 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2394#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002396 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2397 // Synchronize thread's task state
2398 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400 }
2401 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002402
2403#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002404 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002405 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002406 }
2407#endif
2408
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409 return;
2410 }
2411#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002413 /* do cleanup and restore the parent team */
2414 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2415 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2416
2417 master_th->th.th_dispatch =
2418 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002419
2420 /* jc: The following lock has instructions with REL and ACQ semantics,
2421 separating the parallel user code called in this parallel region
2422 from the serial user code called after this function returns.
2423 */
2424 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2425
2426#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002427 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428#endif /* OMP_40_ENABLED */
2429 {
2430 /* Decrement our nested depth level */
2431 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2432 }
2433 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2434
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002435#if OMPT_SUPPORT && OMPT_TRACE
2436 if(ompt_enabled){
2437 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2438 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2439 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2440 parallel_id, task_info->task_id);
2441 }
2442 task_info->frame.exit_runtime_frame = 0;
2443 task_info->task_id = 0;
2444 }
2445#endif
2446
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2448 0, master_th, team ) );
2449 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450
Alp Toker98758b02014-03-02 04:12:06 +00002451#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452 //
2453 // Restore master thread's partition.
2454 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002455 master_th->th.th_first_place = team->t.t_first_place;
2456 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457#endif /* OMP_40_ENABLED */
2458
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002461 if ( root->r.r_active != master_active )
2462 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002464 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465
2466 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002467 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002469 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470 * assertion failures. it's only one deref&assign so might as well put this
2471 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002472 master_th->th.th_team = parent_team;
2473 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2474 master_th->th.th_team_master = parent_team->t.t_threads[0];
2475 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476
2477 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002478 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002479 parent_team != master_th->th.th_serial_team &&
2480 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002481 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2482 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483 }
2484
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002486 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2487 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2488 // Remember master's state if we re-use this nested hot team
2489 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002490 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002491 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002492 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002494 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002495 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002496 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002500 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2501 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2502 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503
2504 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2505
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002506#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002507 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002508 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002509 }
2510#endif
2511
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512 KMP_MB();
2513 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2514}
2515
2516/* ------------------------------------------------------------------------ */
2517/* ------------------------------------------------------------------------ */
2518
2519/* Check whether we should push an internal control record onto the
2520 serial team stack. If so, do it. */
2521void
2522__kmp_save_internal_controls ( kmp_info_t * thread )
2523{
2524
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002525 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526 return;
2527 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002528 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529 int push = 0;
2530
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002531 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 push = 1;
2533 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2535 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 push = 1;
2537 }
2538 }
2539 if (push) { /* push a record on the serial team's stack */
2540 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2541
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543
2544 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2545
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002546 control->next = thread->th.th_team->t.t_control_stack_top;
2547 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 }
2549 }
2550}
2551
2552/* Changes set_nproc */
2553void
2554__kmp_set_num_threads( int new_nth, int gtid )
2555{
2556 kmp_info_t *thread;
2557 kmp_root_t *root;
2558
2559 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2560 KMP_DEBUG_ASSERT( __kmp_init_serial );
2561
2562 if (new_nth < 1)
2563 new_nth = 1;
2564 else if (new_nth > __kmp_max_nth)
2565 new_nth = __kmp_max_nth;
2566
Jonathan Peyton45be4502015-08-11 21:36:41 +00002567 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002568 thread = __kmp_threads[gtid];
2569
2570 __kmp_save_internal_controls( thread );
2571
2572 set__nproc( thread, new_nth );
2573
2574 //
2575 // If this omp_set_num_threads() call will cause the hot team size to be
2576 // reduced (in the absence of a num_threads clause), then reduce it now,
2577 // rather than waiting for the next parallel region.
2578 //
2579 root = thread->th.th_root;
2580 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002581 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2582#if KMP_NESTED_HOT_TEAMS
2583 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2584#endif
2585 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586 kmp_team_t *hot_team = root->r.r_hot_team;
2587 int f;
2588
2589 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2590
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2593 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002594 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2595 // When decreasing team size, threads no longer in the team should unref task team.
2596 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2597 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598 __kmp_free_thread( hot_team->t.t_threads[f] );
2599 hot_team->t.t_threads[f] = NULL;
2600 }
2601 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002602#if KMP_NESTED_HOT_TEAMS
2603 if( thread->th.th_hot_teams ) {
2604 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2605 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2606 }
2607#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2610
2611 //
2612 // Update the t_nproc field in the threads that are still active.
2613 //
2614 for( f=0 ; f < new_nth; f++ ) {
2615 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2616 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2617 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002619 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621}
2622
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623/* Changes max_active_levels */
2624void
2625__kmp_set_max_active_levels( int gtid, int max_active_levels )
2626{
2627 kmp_info_t *thread;
2628
2629 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2630 KMP_DEBUG_ASSERT( __kmp_init_serial );
2631
2632 // validate max_active_levels
2633 if( max_active_levels < 0 ) {
2634 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2635 // We ignore this call if the user has specified a negative value.
2636 // The current setting won't be changed. The last valid setting will be used.
2637 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2638 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2639 return;
2640 }
2641 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2642 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2643 // We allow a zero value. (implementation defined behavior)
2644 } else {
2645 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2646 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2647 // Current upper limit is MAX_INT. (implementation defined behavior)
2648 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2649 // Actually, the flow should never get here until we use MAX_INT limit.
2650 }
2651 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2652
2653 thread = __kmp_threads[ gtid ];
2654
2655 __kmp_save_internal_controls( thread );
2656
2657 set__max_active_levels( thread, max_active_levels );
2658
2659}
2660
2661/* Gets max_active_levels */
2662int
2663__kmp_get_max_active_levels( int gtid )
2664{
2665 kmp_info_t *thread;
2666
2667 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2668 KMP_DEBUG_ASSERT( __kmp_init_serial );
2669
2670 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002671 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002673 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2674 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675}
2676
2677/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2678void
2679__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2680{
2681 kmp_info_t *thread;
2682// kmp_team_t *team;
2683
2684 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2685 KMP_DEBUG_ASSERT( __kmp_init_serial );
2686
2687 // Check if the kind parameter is valid, correct if needed.
2688 // Valid parameters should fit in one of two intervals - standard or extended:
2689 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2690 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2691 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2692 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2693 {
2694 // TODO: Hint needs attention in case we change the default schedule.
2695 __kmp_msg(
2696 kmp_ms_warning,
2697 KMP_MSG( ScheduleKindOutOfRange, kind ),
2698 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2699 __kmp_msg_null
2700 );
2701 kind = kmp_sched_default;
2702 chunk = 0; // ignore chunk value in case of bad kind
2703 }
2704
2705 thread = __kmp_threads[ gtid ];
2706
2707 __kmp_save_internal_controls( thread );
2708
2709 if ( kind < kmp_sched_upper_std ) {
2710 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2711 // differ static chunked vs. unchunked:
2712 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002713 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002714 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002715 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002716 }
2717 } else {
2718 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2721 }
2722 if ( kind == kmp_sched_auto ) {
2723 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002724 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002726 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727 }
2728}
2729
2730/* Gets def_sched_var ICV values */
2731void
2732__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2733{
2734 kmp_info_t *thread;
2735 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736
2737 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2738 KMP_DEBUG_ASSERT( __kmp_init_serial );
2739
2740 thread = __kmp_threads[ gtid ];
2741
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002742 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002743
2744 switch ( th_type ) {
2745 case kmp_sch_static:
2746 case kmp_sch_static_greedy:
2747 case kmp_sch_static_balanced:
2748 *kind = kmp_sched_static;
2749 *chunk = 0; // chunk was not set, try to show this fact via zero value
2750 return;
2751 case kmp_sch_static_chunked:
2752 *kind = kmp_sched_static;
2753 break;
2754 case kmp_sch_dynamic_chunked:
2755 *kind = kmp_sched_dynamic;
2756 break;
2757 case kmp_sch_guided_chunked:
2758 case kmp_sch_guided_iterative_chunked:
2759 case kmp_sch_guided_analytical_chunked:
2760 *kind = kmp_sched_guided;
2761 break;
2762 case kmp_sch_auto:
2763 *kind = kmp_sched_auto;
2764 break;
2765 case kmp_sch_trapezoidal:
2766 *kind = kmp_sched_trapezoidal;
2767 break;
2768/*
2769 case kmp_sch_static_steal:
2770 *kind = kmp_sched_static_steal;
2771 break;
2772*/
2773 default:
2774 KMP_FATAL( UnknownSchedulingType, th_type );
2775 }
2776
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002777 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778}
2779
2780int
2781__kmp_get_ancestor_thread_num( int gtid, int level ) {
2782
2783 int ii, dd;
2784 kmp_team_t *team;
2785 kmp_info_t *thr;
2786
2787 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2788 KMP_DEBUG_ASSERT( __kmp_init_serial );
2789
2790 // validate level
2791 if( level == 0 ) return 0;
2792 if( level < 0 ) return -1;
2793 thr = __kmp_threads[ gtid ];
2794 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002795 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796 if( level > ii ) return -1;
2797
2798#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002799 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800 // AC: we are in teams region where multiple nested teams have same level
2801 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2802 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2803 KMP_DEBUG_ASSERT( ii >= tlevel );
2804 // AC: As we need to pass by the teams league, we need to artificially increase ii
2805 if ( ii == tlevel ) {
2806 ii += 2; // three teams have same level
2807 } else {
2808 ii ++; // two teams have same level
2809 }
2810 }
2811 }
2812#endif
2813
2814 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2815
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002816 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002817 level++;
2818 while( ii > level )
2819 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002820 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821 {
2822 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002823 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824 team = team->t.t_parent;
2825 continue;
2826 }
2827 if( ii > level ) {
2828 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 ii--;
2831 }
2832 }
2833
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002834 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002835}
2836
2837int
2838__kmp_get_team_size( int gtid, int level ) {
2839
2840 int ii, dd;
2841 kmp_team_t *team;
2842 kmp_info_t *thr;
2843
2844 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2845 KMP_DEBUG_ASSERT( __kmp_init_serial );
2846
2847 // validate level
2848 if( level == 0 ) return 1;
2849 if( level < 0 ) return -1;
2850 thr = __kmp_threads[ gtid ];
2851 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002852 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002853 if( level > ii ) return -1;
2854
2855#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002856 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857 // AC: we are in teams region where multiple nested teams have same level
2858 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2859 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2860 KMP_DEBUG_ASSERT( ii >= tlevel );
2861 // AC: As we need to pass by the teams league, we need to artificially increase ii
2862 if ( ii == tlevel ) {
2863 ii += 2; // three teams have same level
2864 } else {
2865 ii ++; // two teams have same level
2866 }
2867 }
2868 }
2869#endif
2870
2871 while( ii > level )
2872 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002873 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002874 {
2875 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002876 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877 team = team->t.t_parent;
2878 continue;
2879 }
2880 if( ii > level ) {
2881 team = team->t.t_parent;
2882 ii--;
2883 }
2884 }
2885
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002886 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887}
2888
Jim Cownie5e8470a2013-09-27 10:38:44 +00002889kmp_r_sched_t
2890__kmp_get_schedule_global() {
2891// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2892// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2893
2894 kmp_r_sched_t r_sched;
2895
2896 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2897 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2898 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2899 if ( __kmp_sched == kmp_sch_static ) {
2900 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2901 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2902 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2903 } else {
2904 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2905 }
2906
2907 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2908 r_sched.chunk = KMP_DEFAULT_CHUNK;
2909 } else {
2910 r_sched.chunk = __kmp_chunk;
2911 }
2912
2913 return r_sched;
2914}
2915
2916/* ------------------------------------------------------------------------ */
2917/* ------------------------------------------------------------------------ */
2918
2919
2920/*
2921 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2922 * at least argc number of *t_argv entries for the requested team.
2923 */
2924static void
2925__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2926{
2927
2928 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930
2931 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2932 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002933 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002934 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2935 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936
2937 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2938 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002939 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2941 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002942 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002943 if ( __kmp_storage_map ) {
2944 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2945 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2946 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2947 "team_%d.t_inline_argv",
2948 team->t.t_id );
2949 }
2950 } else {
2951 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002952 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002953 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2954 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2955 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002956 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 if ( __kmp_storage_map ) {
2958 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2959 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2960 team->t.t_id );
2961 }
2962 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963 }
2964}
2965
2966static void
2967__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2968{
2969 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002970 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002971 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2972 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002974 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002976 team->t.t_max_nproc = max_nth;
2977
2978 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002979 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002980 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002981#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002982 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2983#endif
2984 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002985}
2986
2987static void
2988__kmp_free_team_arrays(kmp_team_t *team) {
2989 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2990 int i;
2991 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2992 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2993 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2994 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2995 }; // if
2996 }; // for
2997 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00002998 __kmp_free(team->t.t_disp_buffer);
2999 __kmp_free(team->t.t_dispatch);
3000 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003001 team->t.t_threads = NULL;
3002 team->t.t_disp_buffer = NULL;
3003 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003004 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005}
3006
3007static void
3008__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3009 kmp_info_t **oldThreads = team->t.t_threads;
3010
Jonathan Peytona58563d2016-03-29 20:05:27 +00003011 __kmp_free(team->t.t_disp_buffer);
3012 __kmp_free(team->t.t_dispatch);
3013 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014 __kmp_allocate_team_arrays(team, max_nth);
3015
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003016 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003017
3018 __kmp_free(oldThreads);
3019}
3020
3021static kmp_internal_control_t
3022__kmp_get_global_icvs( void ) {
3023
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003025
3026#if OMP_40_ENABLED
3027 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3028#endif /* OMP_40_ENABLED */
3029
3030 kmp_internal_control_t g_icvs = {
3031 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003032 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3033 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3034 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003035 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
3036 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003037 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3038 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3040 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041#if OMP_40_ENABLED
3042 __kmp_nested_proc_bind.bind_types[0],
3043#endif /* OMP_40_ENABLED */
3044 NULL //struct kmp_internal_control *next;
3045 };
3046
3047 return g_icvs;
3048}
3049
3050static kmp_internal_control_t
3051__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3052
Jim Cownie5e8470a2013-09-27 10:38:44 +00003053 kmp_internal_control_t gx_icvs;
3054 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3055 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3056 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003057
3058 return gx_icvs;
3059}
3060
3061static void
3062__kmp_initialize_root( kmp_root_t *root )
3063{
3064 int f;
3065 kmp_team_t *root_team;
3066 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003067 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003068 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3069 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003070 KMP_DEBUG_ASSERT( root );
3071 KMP_ASSERT( ! root->r.r_begin );
3072
3073 /* setup the root state structure */
3074 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003075 root->r.r_begin = FALSE;
3076 root->r.r_active = FALSE;
3077 root->r.r_in_parallel = 0;
3078 root->r.r_blocktime = __kmp_dflt_blocktime;
3079 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080
3081 /* setup the root team for this task */
3082 /* allocate the root team structure */
3083 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003084
Jim Cownie5e8470a2013-09-27 10:38:44 +00003085 root_team =
3086 __kmp_allocate_team(
3087 root,
3088 1, // new_nproc
3089 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003090#if OMPT_SUPPORT
3091 0, // root parallel id
3092#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093#if OMP_40_ENABLED
3094 __kmp_nested_proc_bind.bind_types[0],
3095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003096 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003098 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003100#if USE_DEBUGGER
3101 // Non-NULL value should be assigned to make the debugger display the root team.
3102 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3103#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104
3105 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3106
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003107 root->r.r_root_team = root_team;
3108 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109
3110 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003111 root_team->t.t_threads[0] = NULL;
3112 root_team->t.t_nproc = 1;
3113 root_team->t.t_serialized = 1;
3114 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3115 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3116 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003117 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3118 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3119
3120 /* setup the hot team for this task */
3121 /* allocate the hot team structure */
3122 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003123
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124 hot_team =
3125 __kmp_allocate_team(
3126 root,
3127 1, // new_nproc
3128 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003129#if OMPT_SUPPORT
3130 0, // root parallel id
3131#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003132#if OMP_40_ENABLED
3133 __kmp_nested_proc_bind.bind_types[0],
3134#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003135 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003136 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003137 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138 );
3139 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3140
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003141 root->r.r_hot_team = hot_team;
3142 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
3144 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003145 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146
3147 /* initialize hot team */
3148 hot_team_max_nth = hot_team->t.t_max_nproc;
3149 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003150 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003151 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003152 hot_team->t.t_nproc = 1;
3153 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3154 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3155 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003156 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003157}
3158
3159#ifdef KMP_DEBUG
3160
3161
3162typedef struct kmp_team_list_item {
3163 kmp_team_p const * entry;
3164 struct kmp_team_list_item * next;
3165} kmp_team_list_item_t;
3166typedef kmp_team_list_item_t * kmp_team_list_t;
3167
3168
3169static void
3170__kmp_print_structure_team_accum( // Add team to list of teams.
3171 kmp_team_list_t list, // List of teams.
3172 kmp_team_p const * team // Team to add.
3173) {
3174
3175 // List must terminate with item where both entry and next are NULL.
3176 // Team is added to the list only once.
3177 // List is sorted in ascending order by team id.
3178 // Team id is *not* a key.
3179
3180 kmp_team_list_t l;
3181
3182 KMP_DEBUG_ASSERT( list != NULL );
3183 if ( team == NULL ) {
3184 return;
3185 }; // if
3186
3187 __kmp_print_structure_team_accum( list, team->t.t_parent );
3188 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3189
3190 // Search list for the team.
3191 l = list;
3192 while ( l->next != NULL && l->entry != team ) {
3193 l = l->next;
3194 }; // while
3195 if ( l->next != NULL ) {
3196 return; // Team has been added before, exit.
3197 }; // if
3198
3199 // Team is not found. Search list again for insertion point.
3200 l = list;
3201 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3202 l = l->next;
3203 }; // while
3204
3205 // Insert team.
3206 {
3207 kmp_team_list_item_t * item =
3208 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3209 * item = * l;
3210 l->entry = team;
3211 l->next = item;
3212 }
3213
3214}
3215
3216static void
3217__kmp_print_structure_team(
3218 char const * title,
3219 kmp_team_p const * team
3220
3221) {
3222 __kmp_printf( "%s", title );
3223 if ( team != NULL ) {
3224 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3225 } else {
3226 __kmp_printf( " - (nil)\n" );
3227 }; // if
3228}
3229
3230static void
3231__kmp_print_structure_thread(
3232 char const * title,
3233 kmp_info_p const * thread
3234
3235) {
3236 __kmp_printf( "%s", title );
3237 if ( thread != NULL ) {
3238 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3239 } else {
3240 __kmp_printf( " - (nil)\n" );
3241 }; // if
3242}
3243
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003244void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003245__kmp_print_structure(
3246 void
3247) {
3248
3249 kmp_team_list_t list;
3250
3251 // Initialize list of teams.
3252 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3253 list->entry = NULL;
3254 list->next = NULL;
3255
3256 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3257 {
3258 int gtid;
3259 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3260 __kmp_printf( "%2d", gtid );
3261 if ( __kmp_threads != NULL ) {
3262 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3263 }; // if
3264 if ( __kmp_root != NULL ) {
3265 __kmp_printf( " %p", __kmp_root[ gtid ] );
3266 }; // if
3267 __kmp_printf( "\n" );
3268 }; // for gtid
3269 }
3270
3271 // Print out __kmp_threads array.
3272 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3273 if ( __kmp_threads != NULL ) {
3274 int gtid;
3275 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3276 kmp_info_t const * thread = __kmp_threads[ gtid ];
3277 if ( thread != NULL ) {
3278 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3279 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3280 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3281 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3282 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3283 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3284 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3285 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3286#if OMP_40_ENABLED
3287 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3288#endif
3289 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3290 __kmp_printf( "\n" );
3291 __kmp_print_structure_team_accum( list, thread->th.th_team );
3292 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3293 }; // if
3294 }; // for gtid
3295 } else {
3296 __kmp_printf( "Threads array is not allocated.\n" );
3297 }; // if
3298
3299 // Print out __kmp_root array.
3300 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3301 if ( __kmp_root != NULL ) {
3302 int gtid;
3303 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3304 kmp_root_t const * root = __kmp_root[ gtid ];
3305 if ( root != NULL ) {
3306 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3307 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3308 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3309 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3310 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3311 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3312 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3313 __kmp_printf( "\n" );
3314 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3315 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3316 }; // if
3317 }; // for gtid
3318 } else {
3319 __kmp_printf( "Ubers array is not allocated.\n" );
3320 }; // if
3321
3322 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3323 while ( list->next != NULL ) {
3324 kmp_team_p const * team = list->entry;
3325 int i;
3326 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3327 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3328 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3329 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3330 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3331 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3332 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3333 __kmp_printf( " Thread %2d: ", i );
3334 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3335 }; // for i
3336 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3337 __kmp_printf( "\n" );
3338 list = list->next;
3339 }; // while
3340
3341 // Print out __kmp_thread_pool and __kmp_team_pool.
3342 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3343 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3344 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3345 __kmp_printf( "\n" );
3346
3347 // Free team list.
3348 while ( list != NULL ) {
3349 kmp_team_list_item_t * item = list;
3350 list = list->next;
3351 KMP_INTERNAL_FREE( item );
3352 }; // while
3353
3354}
3355
3356#endif
3357
3358
3359//---------------------------------------------------------------------------
3360// Stuff for per-thread fast random number generator
3361// Table of primes
3362
3363static const unsigned __kmp_primes[] = {
3364 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3365 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3366 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3367 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3368 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3369 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3370 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3371 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3372 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3373 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3374 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3375 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3376 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3377 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3378 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3379 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3380};
3381
3382//---------------------------------------------------------------------------
3383// __kmp_get_random: Get a random number using a linear congruential method.
3384
3385unsigned short
3386__kmp_get_random( kmp_info_t * thread )
3387{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003388 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003389 unsigned short r = x>>16;
3390
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003391 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003392
3393 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3394 thread->th.th_info.ds.ds_tid, r) );
3395
3396 return r;
3397}
3398//--------------------------------------------------------
3399// __kmp_init_random: Initialize a random number generator
3400
3401void
3402__kmp_init_random( kmp_info_t * thread )
3403{
3404 unsigned seed = thread->th.th_info.ds.ds_tid;
3405
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003406 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3407 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3408 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409}
3410
3411
3412#if KMP_OS_WINDOWS
3413/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3414static int
3415__kmp_reclaim_dead_roots(void) {
3416 int i, r = 0;
3417
3418 for(i = 0; i < __kmp_threads_capacity; ++i) {
3419 if( KMP_UBER_GTID( i ) &&
3420 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3421 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3422 r += __kmp_unregister_root_other_thread(i);
3423 }
3424 }
3425 return r;
3426}
3427#endif
3428
3429/*
3430 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3431 free entries generated.
3432
3433 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3434 already dead.
3435
3436 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3437 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3438 __kmp_tp_capacity, if threadprivate cache array has been created.
3439 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3440
3441 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3442 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3443 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3444 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3445 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3446 as many free slots as possible up to nWish.
3447
3448 If any argument is negative, the behavior is undefined.
3449*/
3450static int
3451__kmp_expand_threads(int nWish, int nNeed) {
3452 int added = 0;
3453 int old_tp_cached;
3454 int __kmp_actual_max_nth;
3455
3456 if(nNeed > nWish) /* normalize the arguments */
3457 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003458#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459/* only for Windows static library */
3460 /* reclaim array entries for root threads that are already dead */
3461 added = __kmp_reclaim_dead_roots();
3462
3463 if(nNeed) {
3464 nNeed -= added;
3465 if(nNeed < 0)
3466 nNeed = 0;
3467 }
3468 if(nWish) {
3469 nWish -= added;
3470 if(nWish < 0)
3471 nWish = 0;
3472 }
3473#endif
3474 if(nWish <= 0)
3475 return added;
3476
3477 while(1) {
3478 int nTarget;
3479 int minimumRequiredCapacity;
3480 int newCapacity;
3481 kmp_info_t **newThreads;
3482 kmp_root_t **newRoot;
3483
3484 //
3485 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3486 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3487 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3488 // become > __kmp_max_nth in one of two ways:
3489 //
3490 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3491 // may not be resused by another thread, so we may need to increase
3492 // __kmp_threads_capacity to __kmp_max_threads + 1.
3493 //
3494 // 2) New foreign root(s) are encountered. We always register new
3495 // foreign roots. This may cause a smaller # of threads to be
3496 // allocated at subsequent parallel regions, but the worker threads
3497 // hang around (and eventually go to sleep) and need slots in the
3498 // __kmp_threads[] array.
3499 //
3500 // Anyway, that is the reason for moving the check to see if
3501 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3502 // instead of having it performed here. -BB
3503 //
3504 old_tp_cached = __kmp_tp_cached;
3505 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3506 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3507
3508 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3509 nTarget = nWish;
3510 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3511 /* can't fulfil nWish, so try nNeed */
3512 if(nNeed) {
3513 nTarget = nNeed;
3514 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3515 /* possible expansion too small -- give up */
3516 break;
3517 }
3518 } else {
3519 /* best-effort */
3520 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3521 if(!nTarget) {
3522 /* can expand at all -- give up */
3523 break;
3524 }
3525 }
3526 }
3527 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3528
3529 newCapacity = __kmp_threads_capacity;
3530 do{
3531 newCapacity =
3532 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3533 (newCapacity << 1) :
3534 __kmp_actual_max_nth;
3535 } while(newCapacity < minimumRequiredCapacity);
3536 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3537 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003538 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3539 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540 memset(newThreads + __kmp_threads_capacity, 0,
3541 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3542 memset(newRoot + __kmp_threads_capacity, 0,
3543 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3544
3545 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3546 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3547 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3548 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3549 of a double-check pair.
3550 */
3551 __kmp_free(newThreads);
3552 continue; /* start over and try again */
3553 }
3554 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3555 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3556 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3557 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3558 __kmp_free(newThreads);
3559 continue; /* start over and try again */
3560 } else {
3561 /* success */
3562 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3563 //
3564 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3565 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3566 added += newCapacity - __kmp_threads_capacity;
3567 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3568 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003569 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003570 }
3571 }
3572 return added;
3573}
3574
3575/* register the current thread as a root thread and obtain our gtid */
3576/* we must have the __kmp_initz_lock held at this point */
3577/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3578int
3579__kmp_register_root( int initial_thread )
3580{
3581 kmp_info_t *root_thread;
3582 kmp_root_t *root;
3583 int gtid;
3584 int capacity;
3585 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3586 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3587 KMP_MB();
3588
3589
3590 /*
3591 2007-03-02:
3592
3593 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3594 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3595 return false (that means there is at least one empty slot in __kmp_threads array), but it
3596 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3597 used for this one. Following code workarounds this bug.
3598
3599 However, right solution seems to be not reserving slot #0 for initial thread because:
3600 (1) there is no magic in slot #0,
3601 (2) we cannot detect initial thread reliably (the first thread which does serial
3602 initialization may be not a real initial thread).
3603 */
3604 capacity = __kmp_threads_capacity;
3605 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3606 -- capacity;
3607 }; // if
3608
3609 /* see if there are too many threads */
3610 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3611 if ( __kmp_tp_cached ) {
3612 __kmp_msg(
3613 kmp_ms_fatal,
3614 KMP_MSG( CantRegisterNewThread ),
3615 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3616 KMP_HNT( PossibleSystemLimitOnThreads ),
3617 __kmp_msg_null
3618 );
3619 }
3620 else {
3621 __kmp_msg(
3622 kmp_ms_fatal,
3623 KMP_MSG( CantRegisterNewThread ),
3624 KMP_HNT( SystemLimitOnThreads ),
3625 __kmp_msg_null
3626 );
3627 }
3628 }; // if
3629
3630 /* find an available thread slot */
3631 /* Don't reassign the zero slot since we need that to only be used by initial
3632 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003633 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3634 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003635 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3636 KMP_ASSERT( gtid < __kmp_threads_capacity );
3637
3638 /* update global accounting */
3639 __kmp_all_nth ++;
3640 TCW_4(__kmp_nth, __kmp_nth + 1);
3641
3642 //
3643 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3644 // for low numbers of procs, and method #2 (keyed API call) for higher
3645 // numbers of procs.
3646 //
3647 if ( __kmp_adjust_gtid_mode ) {
3648 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3649 if ( TCR_4(__kmp_gtid_mode) != 2) {
3650 TCW_4(__kmp_gtid_mode, 2);
3651 }
3652 }
3653 else {
3654 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3655 TCW_4(__kmp_gtid_mode, 1);
3656 }
3657 }
3658 }
3659
3660#ifdef KMP_ADJUST_BLOCKTIME
3661 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003662 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003663 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3664 if ( __kmp_nth > __kmp_avail_proc ) {
3665 __kmp_zero_bt = TRUE;
3666 }
3667 }
3668#endif /* KMP_ADJUST_BLOCKTIME */
3669
3670 /* setup this new hierarchy */
3671 if( ! ( root = __kmp_root[gtid] )) {
3672 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3673 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3674 }
3675
3676 __kmp_initialize_root( root );
3677
3678 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003679 if( root->r.r_uber_thread ) {
3680 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681 } else {
3682 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3683 if ( __kmp_storage_map ) {
3684 __kmp_print_thread_storage_map( root_thread, gtid );
3685 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003686 root_thread->th.th_info .ds.ds_gtid = gtid;
3687 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003688 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003689 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690 }
3691 #if USE_FAST_MEMORY
3692 __kmp_initialize_fast_memory( root_thread );
3693 #endif /* USE_FAST_MEMORY */
3694
3695 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003696 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697 __kmp_initialize_bget( root_thread );
3698 #endif
3699 __kmp_init_random( root_thread ); // Initialize random number generator
3700 }
3701
3702 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003703 if( ! root_thread->th.th_serial_team ) {
3704 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003705 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003706
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003707 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003708#if OMPT_SUPPORT
3709 0, // root parallel id
3710#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711#if OMP_40_ENABLED
3712 proc_bind_default,
3713#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003715 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003717 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
3721 /* drop root_thread into place */
3722 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3723
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003724 root->r.r_root_team->t.t_threads[0] = root_thread;
3725 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3726 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3727 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3728 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003729
3730 /* initialize the thread, get it ready to go */
3731 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003732 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733
3734 /* prepare the master thread for get_gtid() */
3735 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003736
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003737#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003738 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003739#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003740
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741 #ifdef KMP_TDATA_GTID
3742 __kmp_gtid = gtid;
3743 #endif
3744 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3745 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003746
3747 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3748 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003749 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750 KMP_INIT_BARRIER_STATE ) );
3751 { // Initialize barrier data.
3752 int b;
3753 for ( b = 0; b < bs_last_barrier; ++ b ) {
3754 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003755#if USE_DEBUGGER
3756 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3757#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003758 }; // for
3759 }
3760 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3761
Alp Toker763b9392014-02-28 09:42:41 +00003762#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003763# if OMP_40_ENABLED
3764 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3765 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3766 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3767 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3768# endif
3769
Jim Cownie5e8470a2013-09-27 10:38:44 +00003770 if ( TCR_4(__kmp_init_middle) ) {
3771 __kmp_affinity_set_init_mask( gtid, TRUE );
3772 }
Alp Toker763b9392014-02-28 09:42:41 +00003773#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774
3775 __kmp_root_counter ++;
3776
3777 KMP_MB();
3778 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3779
3780 return gtid;
3781}
3782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003783#if KMP_NESTED_HOT_TEAMS
3784static int
3785__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3786{
3787 int i, n, nth;
3788 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3789 if( !hot_teams || !hot_teams[level].hot_team ) {
3790 return 0;
3791 }
3792 KMP_DEBUG_ASSERT( level < max_level );
3793 kmp_team_t *team = hot_teams[level].hot_team;
3794 nth = hot_teams[level].hot_team_nth;
3795 n = nth - 1; // master is not freed
3796 if( level < max_level - 1 ) {
3797 for( i = 0; i < nth; ++i ) {
3798 kmp_info_t *th = team->t.t_threads[i];
3799 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3800 if( i > 0 && th->th.th_hot_teams ) {
3801 __kmp_free( th->th.th_hot_teams );
3802 th->th.th_hot_teams = NULL;
3803 }
3804 }
3805 }
3806 __kmp_free_team( root, team, NULL );
3807 return n;
3808}
3809#endif
3810
Jim Cownie5e8470a2013-09-27 10:38:44 +00003811/* Resets a root thread and clear its root and hot teams.
3812 Returns the number of __kmp_threads entries directly and indirectly freed.
3813*/
3814static int
3815__kmp_reset_root(int gtid, kmp_root_t *root)
3816{
3817 kmp_team_t * root_team = root->r.r_root_team;
3818 kmp_team_t * hot_team = root->r.r_hot_team;
3819 int n = hot_team->t.t_nproc;
3820 int i;
3821
3822 KMP_DEBUG_ASSERT( ! root->r.r_active );
3823
3824 root->r.r_root_team = NULL;
3825 root->r.r_hot_team = NULL;
3826 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3827 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003828 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3829#if KMP_NESTED_HOT_TEAMS
3830 if( __kmp_hot_teams_max_level > 1 ) { // need to free nested hot teams and their threads if any
3831 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3832 kmp_info_t *th = hot_team->t.t_threads[i];
3833 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3834 if( th->th.th_hot_teams ) {
3835 __kmp_free( th->th.th_hot_teams );
3836 th->th.th_hot_teams = NULL;
3837 }
3838 }
3839 }
3840#endif
3841 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843 //
3844 // Before we can reap the thread, we need to make certain that all
3845 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3846 //
3847 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3848 __kmp_wait_to_unref_task_teams();
3849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003850
3851 #if KMP_OS_WINDOWS
3852 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3853 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3854 (LPVOID)&(root->r.r_uber_thread->th),
3855 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3856 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3857 #endif /* KMP_OS_WINDOWS */
3858
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003859#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003860 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003861 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3862 int gtid = __kmp_get_gtid();
3863 __ompt_thread_end(ompt_thread_initial, gtid);
3864 }
3865#endif
3866
Jim Cownie5e8470a2013-09-27 10:38:44 +00003867 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3868 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3869
3870 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3871 root->r.r_uber_thread = NULL;
3872 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003873 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003874
3875 return n;
3876}
3877
3878void
3879__kmp_unregister_root_current_thread( int gtid )
3880{
Jim Cownie77c2a632014-09-03 11:34:33 +00003881 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003882 /* this lock should be ok, since unregister_root_current_thread is never called during
3883 * and abort, only during a normal close. furthermore, if you have the
3884 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003885
3886 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3887 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3888 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3889 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3890 return;
3891 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003892 kmp_root_t *root = __kmp_root[gtid];
3893
Jim Cownie5e8470a2013-09-27 10:38:44 +00003894 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3895 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3896 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3897 KMP_ASSERT( root->r.r_active == FALSE );
3898
Jim Cownie5e8470a2013-09-27 10:38:44 +00003899
3900 KMP_MB();
3901
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003902#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003903 kmp_info_t * thread = __kmp_threads[gtid];
3904 kmp_team_t * team = thread->th.th_team;
3905 kmp_task_team_t * task_team = thread->th.th_task_team;
3906
3907 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003908 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3909#if OMPT_SUPPORT
3910 // the runtime is shutting down so we won't report any events
3911 thread->th.ompt_thread_info.state = ompt_state_undefined;
3912#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003913 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003914 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003915#endif
3916
Jim Cownie5e8470a2013-09-27 10:38:44 +00003917 __kmp_reset_root(gtid, root);
3918
3919 /* free up this thread slot */
3920 __kmp_gtid_set_specific( KMP_GTID_DNE );
3921#ifdef KMP_TDATA_GTID
3922 __kmp_gtid = KMP_GTID_DNE;
3923#endif
3924
3925 KMP_MB();
3926 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3927
3928 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3929}
3930
Jonathan Peyton2321d572015-06-08 19:25:25 +00003931#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003932/* __kmp_forkjoin_lock must be already held
3933 Unregisters a root thread that is not the current thread. Returns the number of
3934 __kmp_threads entries freed as a result.
3935 */
3936static int
3937__kmp_unregister_root_other_thread( int gtid )
3938{
3939 kmp_root_t *root = __kmp_root[gtid];
3940 int r;
3941
3942 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3943 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3944 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3945 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3946 KMP_ASSERT( root->r.r_active == FALSE );
3947
3948 r = __kmp_reset_root(gtid, root);
3949 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3950 return r;
3951}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003952#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Jim Cownie5e8470a2013-09-27 10:38:44 +00003954#if KMP_DEBUG
3955void __kmp_task_info() {
3956
3957 kmp_int32 gtid = __kmp_entry_gtid();
3958 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3959 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003960 kmp_team_t *steam = this_thr->th.th_serial_team;
3961 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003962
3963 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3964 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3965}
3966#endif // KMP_DEBUG
3967
Jim Cownie5e8470a2013-09-27 10:38:44 +00003968/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003969 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970 * initialization of features as much as possible */
3971static void
3972__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3973{
3974 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3975 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003976 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003977 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003978 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003979 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003980 KMP_DEBUG_ASSERT( team->t.t_threads );
3981 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3982 KMP_DEBUG_ASSERT( master );
3983 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003984
3985 KMP_MB();
3986
3987 TCW_SYNC_PTR(this_thr->th.th_team, team);
3988
3989 this_thr->th.th_info.ds.ds_tid = tid;
3990 this_thr->th.th_set_nproc = 0;
3991#if OMP_40_ENABLED
3992 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00003993# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003994 this_thr->th.th_new_place = this_thr->th.th_current_place;
3995# endif
3996#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003997 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003998
3999 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004000 this_thr->th.th_team_nproc = team->t.t_nproc;
4001 this_thr->th.th_team_master = master;
4002 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4004
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004005 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004006
4007 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4008 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4009
4010 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4011
4012 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4013 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4014 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015
4016 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004017 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018
4019 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004020
4021#ifdef BUILD_TV
4022 this_thr->th.th_local.tv_data = 0;
4023#endif
4024
4025 if ( ! this_thr->th.th_pri_common ) {
4026 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4027 if ( __kmp_storage_map ) {
4028 __kmp_print_storage_map_gtid(
4029 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4030 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4031 );
4032 }; // if
4033 this_thr->th.th_pri_head = NULL;
4034 }; // if
4035
4036 /* Initialize dynamic dispatch */
4037 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004038 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039 /*
4040 * Use team max_nproc since this will never change for the team.
4041 */
4042 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004043 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004044 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4045 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004046 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004047 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4048
4049 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004050#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004051 dispatch->th_doacross_buf_idx = 0;
4052#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004053 if( ! dispatch->th_disp_buffer ) {
4054 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055
4056 if ( __kmp_storage_map ) {
4057 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004058 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004059 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4060 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4061 gtid, team->t.t_id, gtid );
4062 }
4063 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004064 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065 }
4066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004067 dispatch->th_dispatch_pr_current = 0;
4068 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004070 dispatch->th_deo_fcn = 0; /* ORDERED */
4071 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004072 }
4073
4074 this_thr->th.th_next_pool = NULL;
4075
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004076 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004077 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004078 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4079 this_thr->th.th_task_state_top = 0;
4080 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004081 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4082 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004083 }
4084
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4086 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4087
4088 KMP_MB();
4089}
4090
4091
4092/* allocate a new thread for the requesting team. this is only called from within a
4093 * forkjoin critical section. we will first try to get an available thread from the
4094 * thread pool. if none is available, we will fork a new one assuming we are able
4095 * to create a new one. this should be assured, as the caller should check on this
4096 * first.
4097 */
4098kmp_info_t *
4099__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4100{
4101 kmp_team_t *serial_team;
4102 kmp_info_t *new_thr;
4103 int new_gtid;
4104
4105 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4106 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004107#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004108 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004109#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004110 KMP_MB();
4111
4112 /* first, try to get one from the thread pool */
4113 if ( __kmp_thread_pool ) {
4114
4115 new_thr = (kmp_info_t*)__kmp_thread_pool;
4116 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4117 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4118 __kmp_thread_pool_insert_pt = NULL;
4119 }
4120 TCW_4(new_thr->th.th_in_pool, FALSE);
4121 //
4122 // Don't touch th_active_in_pool or th_active.
4123 // The worker thread adjusts those flags as it sleeps/awakens.
4124 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004125 __kmp_thread_pool_nth--;
4126
4127 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4128 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004129 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4131 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4132
4133 /* setup the thread structure */
4134 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4135 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4136
4137 TCW_4(__kmp_nth, __kmp_nth + 1);
4138
Jonathan Peyton54127982015-11-04 21:37:48 +00004139 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004140 new_thr->th.th_task_state_top = 0;
4141 new_thr->th.th_task_state_stack_sz = 4;
4142
Jim Cownie5e8470a2013-09-27 10:38:44 +00004143#ifdef KMP_ADJUST_BLOCKTIME
4144 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004145 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004146 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4147 if ( __kmp_nth > __kmp_avail_proc ) {
4148 __kmp_zero_bt = TRUE;
4149 }
4150 }
4151#endif /* KMP_ADJUST_BLOCKTIME */
4152
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004153#if KMP_DEBUG
4154 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4155 int b;
4156 kmp_balign_t * balign = new_thr->th.th_bar;
4157 for( b = 0; b < bs_last_barrier; ++ b )
4158 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4159#endif
4160
Jim Cownie5e8470a2013-09-27 10:38:44 +00004161 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4162 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4163
4164 KMP_MB();
4165 return new_thr;
4166 }
4167
4168
4169 /* no, well fork a new one */
4170 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4171 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4172
4173 //
4174 // If this is the first worker thread the RTL is creating, then also
4175 // launch the monitor thread. We try to do this as early as possible.
4176 //
4177 if ( ! TCR_4( __kmp_init_monitor ) ) {
4178 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4179 if ( ! TCR_4( __kmp_init_monitor ) ) {
4180 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4181 TCW_4( __kmp_init_monitor, 1 );
4182 __kmp_create_monitor( & __kmp_monitor );
4183 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004184 #if KMP_OS_WINDOWS
4185 // AC: wait until monitor has started. This is a fix for CQ232808.
4186 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4187 // work in between, then there is high probability that monitor thread started after
4188 // the library shutdown. At shutdown it is too late to cope with the problem, because
4189 // when the master is in DllMain (process detach) the monitor has no chances to start
4190 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4191 // because all the memory which the monitor can access is going to be released/reset.
4192 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4193 KMP_YIELD( TRUE );
4194 }
4195 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4196 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004197 }
4198 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4199 }
4200
4201 KMP_MB();
4202 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4203 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4204 }
4205
4206 /* allocate space for it. */
4207 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4208
4209 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4210
4211 if ( __kmp_storage_map ) {
4212 __kmp_print_thread_storage_map( new_thr, new_gtid );
4213 }
4214
4215 /* add the reserve serialized team, initialized from the team's master thread */
4216 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004217 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004219
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004220 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004221 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004222#if OMPT_SUPPORT
4223 0, // root parallel id
4224#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225#if OMP_40_ENABLED
4226 proc_bind_default,
4227#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004228 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004230 }
4231 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004232 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4233 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004234 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4235 new_thr ) );
4236
4237 /* setup the thread structures */
4238 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4239
4240 #if USE_FAST_MEMORY
4241 __kmp_initialize_fast_memory( new_thr );
4242 #endif /* USE_FAST_MEMORY */
4243
4244 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004245 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246 __kmp_initialize_bget( new_thr );
4247 #endif
4248
4249 __kmp_init_random( new_thr ); // Initialize random number generator
4250
4251 /* Initialize these only once when thread is grabbed for a team allocation */
4252 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4253 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4254
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004255 int b;
4256 kmp_balign_t * balign = new_thr->th.th_bar;
4257 for(b=0; b<bs_last_barrier; ++b) {
4258 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4259 balign[b].bb.team = NULL;
4260 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4261 balign[b].bb.use_oncore_barrier = 0;
4262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004263
4264 new_thr->th.th_spin_here = FALSE;
4265 new_thr->th.th_next_waiting = 0;
4266
Alp Toker98758b02014-03-02 04:12:06 +00004267#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004268 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4269 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4270 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4271 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4272#endif
4273
4274 TCW_4(new_thr->th.th_in_pool, FALSE);
4275 new_thr->th.th_active_in_pool = FALSE;
4276 TCW_4(new_thr->th.th_active, TRUE);
4277
4278 /* adjust the global counters */
4279 __kmp_all_nth ++;
4280 __kmp_nth ++;
4281
4282 //
4283 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4284 // for low numbers of procs, and method #2 (keyed API call) for higher
4285 // numbers of procs.
4286 //
4287 if ( __kmp_adjust_gtid_mode ) {
4288 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4289 if ( TCR_4(__kmp_gtid_mode) != 2) {
4290 TCW_4(__kmp_gtid_mode, 2);
4291 }
4292 }
4293 else {
4294 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4295 TCW_4(__kmp_gtid_mode, 1);
4296 }
4297 }
4298 }
4299
4300#ifdef KMP_ADJUST_BLOCKTIME
4301 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004302 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004303 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4304 if ( __kmp_nth > __kmp_avail_proc ) {
4305 __kmp_zero_bt = TRUE;
4306 }
4307 }
4308#endif /* KMP_ADJUST_BLOCKTIME */
4309
4310 /* actually fork it and create the new worker thread */
4311 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4312 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4313 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4314
Jim Cownie5e8470a2013-09-27 10:38:44 +00004315 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4316 KMP_MB();
4317 return new_thr;
4318}
4319
4320/*
4321 * reinitialize team for reuse.
4322 *
4323 * The hot team code calls this case at every fork barrier, so EPCC barrier
4324 * test are extremely sensitive to changes in it, esp. writes to the team
4325 * struct, which cause a cache invalidation in all threads.
4326 *
4327 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4328 */
4329static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004330__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004331 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4332 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004333 KMP_DEBUG_ASSERT( team && new_icvs);
4334 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004335 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004336
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004337 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004338
Jim Cownie181b4bb2013-12-23 17:28:57 +00004339 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004340 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004341 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004342
4343 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4344 team->t.t_threads[0], team ) );
4345}
4346
Jim Cownie5e8470a2013-09-27 10:38:44 +00004347
4348/* initialize the team data structure
4349 * this assumes the t_threads and t_max_nproc are already set
4350 * also, we don't touch the arguments */
4351static void
4352__kmp_initialize_team(
4353 kmp_team_t * team,
4354 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004355 kmp_internal_control_t * new_icvs,
4356 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004357) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004358 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4359
Jim Cownie5e8470a2013-09-27 10:38:44 +00004360 /* verify */
4361 KMP_DEBUG_ASSERT( team );
4362 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4363 KMP_DEBUG_ASSERT( team->t.t_threads );
4364 KMP_MB();
4365
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004366 team->t.t_master_tid = 0; /* not needed */
4367 /* team->t.t_master_bar; not needed */
4368 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4369 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004370
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004371 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4372 team->t.t_next_pool = NULL;
4373 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004374
4375 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004376 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004377
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004378 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4379 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004380
4381#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004382 team->t.t_fp_control_saved = FALSE; /* not needed */
4383 team->t.t_x87_fpu_control_word = 0; /* not needed */
4384 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004385#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004387 team->t.t_construct = 0;
4388 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004389
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 team->t.t_ordered .dt.t_value = 0;
4391 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004392
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004393 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394
4395#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004396 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004397#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004398 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004399
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004400 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004401
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004402 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
4404 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004405 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406}
4407
Alp Toker98758b02014-03-02 04:12:06 +00004408#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409/* Sets full mask for thread and returns old mask, no changes to structures. */
4410static void
4411__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4412{
4413 if ( KMP_AFFINITY_CAPABLE() ) {
4414 int status;
4415 if ( old_mask != NULL ) {
4416 status = __kmp_get_system_affinity( old_mask, TRUE );
4417 int error = errno;
4418 if ( status != 0 ) {
4419 __kmp_msg(
4420 kmp_ms_fatal,
4421 KMP_MSG( ChangeThreadAffMaskError ),
4422 KMP_ERR( error ),
4423 __kmp_msg_null
4424 );
4425 }
4426 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004427 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004428 }
4429}
4430#endif
4431
Alp Toker98758b02014-03-02 04:12:06 +00004432#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004433
4434//
4435// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4436// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004437// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004438// The master thread's partition should already include its current binding.
4439//
4440static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004441__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004442{
4443 //
4444 // Copy the master thread's place partion to the team struct
4445 //
4446 kmp_info_t *master_th = team->t.t_threads[0];
4447 KMP_DEBUG_ASSERT( master_th != NULL );
4448 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4449 int first_place = master_th->th.th_first_place;
4450 int last_place = master_th->th.th_last_place;
4451 int masters_place = master_th->th.th_current_place;
4452 team->t.t_first_place = first_place;
4453 team->t.t_last_place = last_place;
4454
4455 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4456 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4457 masters_place, first_place, last_place ) );
4458
4459 switch ( proc_bind ) {
4460
4461 case proc_bind_default:
4462 //
4463 // serial teams might have the proc_bind policy set to
4464 // proc_bind_default. It doesn't matter, as we don't
4465 // rebind the master thread for any proc_bind policy.
4466 //
4467 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4468 break;
4469
4470 case proc_bind_master:
4471 {
4472 int f;
4473 int n_th = team->t.t_nproc;
4474 for ( f = 1; f < n_th; f++ ) {
4475 kmp_info_t *th = team->t.t_threads[f];
4476 KMP_DEBUG_ASSERT( th != NULL );
4477 th->th.th_first_place = first_place;
4478 th->th.th_last_place = last_place;
4479 th->th.th_new_place = masters_place;
4480
4481 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4482 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4483 team->t.t_id, f, masters_place, first_place, last_place ) );
4484 }
4485 }
4486 break;
4487
4488 case proc_bind_close:
4489 {
4490 int f;
4491 int n_th = team->t.t_nproc;
4492 int n_places;
4493 if ( first_place <= last_place ) {
4494 n_places = last_place - first_place + 1;
4495 }
4496 else {
4497 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4498 }
4499 if ( n_th <= n_places ) {
4500 int place = masters_place;
4501 for ( f = 1; f < n_th; f++ ) {
4502 kmp_info_t *th = team->t.t_threads[f];
4503 KMP_DEBUG_ASSERT( th != NULL );
4504
4505 if ( place == last_place ) {
4506 place = first_place;
4507 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004508 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004509 place = 0;
4510 }
4511 else {
4512 place++;
4513 }
4514 th->th.th_first_place = first_place;
4515 th->th.th_last_place = last_place;
4516 th->th.th_new_place = place;
4517
4518 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4519 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4520 team->t.t_id, f, place, first_place, last_place ) );
4521 }
4522 }
4523 else {
4524 int S, rem, gap, s_count;
4525 S = n_th / n_places;
4526 s_count = 0;
4527 rem = n_th - ( S * n_places );
4528 gap = rem > 0 ? n_places/rem : n_places;
4529 int place = masters_place;
4530 int gap_ct = gap;
4531 for ( f = 0; f < n_th; f++ ) {
4532 kmp_info_t *th = team->t.t_threads[f];
4533 KMP_DEBUG_ASSERT( th != NULL );
4534
4535 th->th.th_first_place = first_place;
4536 th->th.th_last_place = last_place;
4537 th->th.th_new_place = place;
4538 s_count++;
4539
4540 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4541 // do nothing, add an extra thread to place on next iteration
4542 }
4543 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4544 // we added an extra thread to this place; move to next place
4545 if ( place == last_place ) {
4546 place = first_place;
4547 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004548 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004549 place = 0;
4550 }
4551 else {
4552 place++;
4553 }
4554 s_count = 0;
4555 gap_ct = 1;
4556 rem--;
4557 }
4558 else if (s_count == S) { // place full; don't add extra
4559 if ( place == last_place ) {
4560 place = first_place;
4561 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004562 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004563 place = 0;
4564 }
4565 else {
4566 place++;
4567 }
4568 gap_ct++;
4569 s_count = 0;
4570 }
4571
4572 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4573 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4574 team->t.t_id, f, th->th.th_new_place, first_place,
4575 last_place ) );
4576 }
4577 KMP_DEBUG_ASSERT( place == masters_place );
4578 }
4579 }
4580 break;
4581
4582 case proc_bind_spread:
4583 {
4584 int f;
4585 int n_th = team->t.t_nproc;
4586 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004587 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004588 if ( first_place <= last_place ) {
4589 n_places = last_place - first_place + 1;
4590 }
4591 else {
4592 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4593 }
4594 if ( n_th <= n_places ) {
4595 int place = masters_place;
4596 int S = n_places/n_th;
4597 int s_count, rem, gap, gap_ct;
4598 rem = n_places - n_th*S;
4599 gap = rem ? n_th/rem : 1;
4600 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004601 thidx = n_th;
4602 if (update_master_only == 1)
4603 thidx = 1;
4604 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004605 kmp_info_t *th = team->t.t_threads[f];
4606 KMP_DEBUG_ASSERT( th != NULL );
4607
4608 th->th.th_first_place = place;
4609 th->th.th_new_place = place;
4610 s_count = 1;
4611 while (s_count < S) {
4612 if ( place == last_place ) {
4613 place = first_place;
4614 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004615 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004616 place = 0;
4617 }
4618 else {
4619 place++;
4620 }
4621 s_count++;
4622 }
4623 if (rem && (gap_ct == gap)) {
4624 if ( place == last_place ) {
4625 place = first_place;
4626 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004627 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004628 place = 0;
4629 }
4630 else {
4631 place++;
4632 }
4633 rem--;
4634 gap_ct = 0;
4635 }
4636 th->th.th_last_place = place;
4637 gap_ct++;
4638
4639 if ( place == last_place ) {
4640 place = first_place;
4641 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004642 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004643 place = 0;
4644 }
4645 else {
4646 place++;
4647 }
4648
4649 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4650 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4651 team->t.t_id, f, th->th.th_new_place,
4652 th->th.th_first_place, th->th.th_last_place ) );
4653 }
4654 KMP_DEBUG_ASSERT( place == masters_place );
4655 }
4656 else {
4657 int S, rem, gap, s_count;
4658 S = n_th / n_places;
4659 s_count = 0;
4660 rem = n_th - ( S * n_places );
4661 gap = rem > 0 ? n_places/rem : n_places;
4662 int place = masters_place;
4663 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004664 thidx = n_th;
4665 if (update_master_only == 1)
4666 thidx = 1;
4667 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004668 kmp_info_t *th = team->t.t_threads[f];
4669 KMP_DEBUG_ASSERT( th != NULL );
4670
4671 th->th.th_first_place = place;
4672 th->th.th_last_place = place;
4673 th->th.th_new_place = place;
4674 s_count++;
4675
4676 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4677 // do nothing, add an extra thread to place on next iteration
4678 }
4679 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4680 // we added an extra thread to this place; move on to next place
4681 if ( place == last_place ) {
4682 place = first_place;
4683 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004684 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004685 place = 0;
4686 }
4687 else {
4688 place++;
4689 }
4690 s_count = 0;
4691 gap_ct = 1;
4692 rem--;
4693 }
4694 else if (s_count == S) { // place is full; don't add extra thread
4695 if ( place == last_place ) {
4696 place = first_place;
4697 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004698 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004699 place = 0;
4700 }
4701 else {
4702 place++;
4703 }
4704 gap_ct++;
4705 s_count = 0;
4706 }
4707
4708 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4709 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4710 team->t.t_id, f, th->th.th_new_place,
4711 th->th.th_first_place, th->th.th_last_place) );
4712 }
4713 KMP_DEBUG_ASSERT( place == masters_place );
4714 }
4715 }
4716 break;
4717
4718 default:
4719 break;
4720 }
4721
4722 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4723}
4724
Alp Toker98758b02014-03-02 04:12:06 +00004725#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004726
4727/* allocate a new team data structure to use. take one off of the free pool if available */
4728kmp_team_t *
4729__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004730#if OMPT_SUPPORT
4731 ompt_parallel_id_t ompt_parallel_id,
4732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004733#if OMP_40_ENABLED
4734 kmp_proc_bind_t new_proc_bind,
4735#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004736 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004737 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004738{
Jonathan Peyton45be4502015-08-11 21:36:41 +00004739 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740 int f;
4741 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004742 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004743 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004744
4745 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4746 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4747 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4748 KMP_MB();
4749
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004750#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004751 kmp_hot_team_ptr_t *hot_teams;
4752 if( master ) {
4753 team = master->th.th_team;
4754 level = team->t.t_active_level;
4755 if( master->th.th_teams_microtask ) { // in teams construct?
4756 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4757 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4758 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4759 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4760 }
4761 }
4762 hot_teams = master->th.th_hot_teams;
4763 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4764 { // hot team has already been allocated for given level
4765 use_hot_team = 1;
4766 } else {
4767 use_hot_team = 0;
4768 }
4769 }
4770#endif
4771 // Optimization to use a "hot" team
4772 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004773 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004774#if KMP_NESTED_HOT_TEAMS
4775 team = hot_teams[level].hot_team;
4776#else
4777 team = root->r.r_hot_team;
4778#endif
4779#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004780 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004781 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4782 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004783 }
4784#endif
4785
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004786 // Has the number of threads changed?
4787 /* Let's assume the most common case is that the number of threads is unchanged, and
4788 put that case first. */
4789 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4790 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004791 // This case can mean that omp_set_num_threads() was called and the hot team size
4792 // was already reduced, so we check the special flag
4793 if ( team->t.t_size_changed == -1 ) {
4794 team->t.t_size_changed = 1;
4795 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004796 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004797 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004798
4799 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004800 kmp_r_sched_t new_sched = new_icvs->sched;
4801 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
4802 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004803
4804 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4805
4806 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4807 0, team->t.t_threads[0], team ) );
4808 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4809
4810#if OMP_40_ENABLED
4811# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004812 if ( ( team->t.t_size_changed == 0 )
4813 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004814 if (new_proc_bind == proc_bind_spread) {
4815 __kmp_partition_places(team, 1); // add flag to update only master for spread
4816 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004817 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4818 team->t.t_id, new_proc_bind, team->t.t_first_place,
4819 team->t.t_last_place ) );
4820 }
4821 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004822 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004823 __kmp_partition_places( team );
4824 }
4825# else
4826 if ( team->t.t_proc_bind != new_proc_bind ) {
4827 team->t.t_proc_bind = new_proc_bind;
4828 }
4829# endif /* KMP_AFFINITY_SUPPORTED */
4830#endif /* OMP_40_ENABLED */
4831 }
4832 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004833 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4834
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004835 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004836#if KMP_NESTED_HOT_TEAMS
4837 if( __kmp_hot_teams_mode == 0 ) {
4838 // AC: saved number of threads should correspond to team's value in this mode,
4839 // can be bigger in mode 1, when hot team has some threads in reserve
4840 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4841 hot_teams[level].hot_team_nth = new_nproc;
4842#endif // KMP_NESTED_HOT_TEAMS
4843 /* release the extra threads we don't need any more */
4844 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4845 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004846 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4847 // When decreasing team size, threads no longer in the team should unref task team.
4848 team->t.t_threads[f]->th.th_task_team = NULL;
4849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004850 __kmp_free_thread( team->t.t_threads[ f ] );
4851 team->t.t_threads[ f ] = NULL;
4852 }
4853#if KMP_NESTED_HOT_TEAMS
4854 } // (__kmp_hot_teams_mode == 0)
4855#endif // KMP_NESTED_HOT_TEAMS
4856 team->t.t_nproc = new_nproc;
4857 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4858 team->t.t_sched = new_icvs->sched;
4859 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004860
Jim Cownie5e8470a2013-09-27 10:38:44 +00004861 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004862 for(f = 0; f < new_nproc; ++f) {
4863 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004864 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004865 // restore the current task state of the master thread: should be the implicit task
4866 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4867 0, team->t.t_threads[0], team ) );
4868
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004869 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004870
4871#ifdef KMP_DEBUG
4872 for ( f = 0; f < team->t.t_nproc; f++ ) {
4873 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4874 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4875 }
4876#endif
4877
4878#if OMP_40_ENABLED
4879 team->t.t_proc_bind = new_proc_bind;
Alp Toker98758b02014-03-02 04:12:06 +00004880# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004881 __kmp_partition_places( team );
4882# endif
4883#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004885 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004886#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004887 kmp_affin_mask_t *old_mask;
4888 if ( KMP_AFFINITY_CAPABLE() ) {
4889 KMP_CPU_ALLOC(old_mask);
4890 }
4891#endif
4892
4893 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004895 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004896
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004897#if KMP_NESTED_HOT_TEAMS
4898 int avail_threads = hot_teams[level].hot_team_nth;
4899 if( new_nproc < avail_threads )
4900 avail_threads = new_nproc;
4901 kmp_info_t **other_threads = team->t.t_threads;
4902 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4903 // Adjust barrier data of reserved threads (if any) of the team
4904 // Other data will be set in __kmp_initialize_info() below.
4905 int b;
4906 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4907 for ( b = 0; b < bs_last_barrier; ++ b ) {
4908 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4909 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004910#if USE_DEBUGGER
4911 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4912#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004913 }
4914 }
4915 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4916 // we have all needed threads in reserve, no need to allocate any
4917 // this only possible in mode 1, cannot have reserved threads in mode 0
4918 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4919 team->t.t_nproc = new_nproc; // just get reserved threads involved
4920 } else {
4921 // we may have some threads in reserve, but not enough
4922 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4923 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4924#endif // KMP_NESTED_HOT_TEAMS
4925 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004926 /* reallocate larger arrays */
4927 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004928 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004929 }
4930
Alp Toker98758b02014-03-02 04:12:06 +00004931#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004932 /* Temporarily set full mask for master thread before
4933 creation of workers. The reason is that workers inherit
4934 the affinity from master, so if a lot of workers are
4935 created on the single core quickly, they don't get
4936 a chance to set their own affinity for a long time.
4937 */
4938 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4939#endif
4940
4941 /* allocate new threads for the hot team */
4942 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4943 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4944 KMP_DEBUG_ASSERT( new_worker );
4945 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004946
Jonathan Peytond26e2132015-09-10 18:44:30 +00004947 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004948 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4949 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4950 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4951
4952 { // Initialize barrier data for new threads.
4953 int b;
4954 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004955 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004956 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004957 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004958#if USE_DEBUGGER
4959 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4960#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004961 }
4962 }
4963 }
4964
Alp Toker98758b02014-03-02 04:12:06 +00004965#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004966 if ( KMP_AFFINITY_CAPABLE() ) {
4967 /* Restore initial master thread's affinity mask */
4968 __kmp_set_system_affinity( old_mask, TRUE );
4969 KMP_CPU_FREE(old_mask);
4970 }
4971#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004972#if KMP_NESTED_HOT_TEAMS
4973 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4974#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004975 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00004976 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004977 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004978
Jonathan Peytone03b62f2015-10-08 18:49:40 +00004979 /* reinitialize the threads */
4980 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00004981 for (f=0; f < team->t.t_nproc; ++f)
4982 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4983 if (level) { // set th_task_state for new threads in nested hot team
4984 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00004985 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00004986 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
4987 for (f=old_nproc; f < team->t.t_nproc; ++f)
4988 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004989 }
Jonathan Peyton54127982015-11-04 21:37:48 +00004990 else { // set th_task_state for new threads in non-nested hot team
4991 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
4992 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004993 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004994 }
4995
Jim Cownie5e8470a2013-09-27 10:38:44 +00004996#ifdef KMP_DEBUG
4997 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4998 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4999 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5000 }
5001#endif
5002
5003#if OMP_40_ENABLED
5004 team->t.t_proc_bind = new_proc_bind;
Alp Toker98758b02014-03-02 04:12:06 +00005005# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005006 __kmp_partition_places( team );
5007# endif
5008#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005009 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005010
5011#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005012 kmp_info_t *master = team->t.t_threads[0];
5013 if( master->th.th_teams_microtask ) {
5014 for( f = 1; f < new_nproc; ++f ) {
5015 // propagate teams construct specific info to workers
5016 kmp_info_t *thr = team->t.t_threads[f];
5017 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5018 thr->th.th_teams_level = master->th.th_teams_level;
5019 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005020 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005021 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005022#endif /* OMP_40_ENABLED */
5023#if KMP_NESTED_HOT_TEAMS
5024 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005025 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005026 for( f = 1; f < new_nproc; ++f ) {
5027 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005028 int b;
5029 kmp_balign_t * balign = thr->th.th_bar;
5030 for( b = 0; b < bs_last_barrier; ++ b ) {
5031 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5032 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005033#if USE_DEBUGGER
5034 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5035#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005036 }
5037 }
5038 }
5039#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005040
5041 /* reallocate space for arguments if necessary */
5042 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005043 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005044 //
5045 // The hot team re-uses the previous task team,
5046 // if untouched during the previous release->gather phase.
5047 //
5048
5049 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5050
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005051#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005052 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005053 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5054 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005055 }
5056#endif
5057
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005058#if OMPT_SUPPORT
5059 __ompt_team_assign_id(team, ompt_parallel_id);
5060#endif
5061
Jim Cownie5e8470a2013-09-27 10:38:44 +00005062 KMP_MB();
5063
5064 return team;
5065 }
5066
5067 /* next, let's try to take one from the team pool */
5068 KMP_MB();
5069 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5070 {
5071 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5072 if ( team->t.t_max_nproc >= max_nproc ) {
5073 /* take this team from the team pool */
5074 __kmp_team_pool = team->t.t_next_pool;
5075
5076 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005077 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005078
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005079 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5080 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5081 team->t.t_task_team[0] = NULL;
5082 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005083
5084 /* reallocate space for arguments if necessary */
5085 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005086 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005087
5088 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5089 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5090 { // Initialize barrier data.
5091 int b;
5092 for ( b = 0; b < bs_last_barrier; ++ b) {
5093 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005094#if USE_DEBUGGER
5095 team->t.t_bar[ b ].b_master_arrived = 0;
5096 team->t.t_bar[ b ].b_team_arrived = 0;
5097#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005098 }
5099 }
5100
5101#if OMP_40_ENABLED
5102 team->t.t_proc_bind = new_proc_bind;
5103#endif
5104
5105 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005106
5107#if OMPT_SUPPORT
5108 __ompt_team_assign_id(team, ompt_parallel_id);
5109#endif
5110
Jim Cownie5e8470a2013-09-27 10:38:44 +00005111 KMP_MB();
5112
5113 return team;
5114 }
5115
5116 /* reap team if it is too small, then loop back and check the next one */
5117 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5118 /* TODO: Use technique to find the right size hot-team, don't reap them */
5119 team = __kmp_reap_team( team );
5120 __kmp_team_pool = team;
5121 }
5122
5123 /* nothing available in the pool, no matter, make a new team! */
5124 KMP_MB();
5125 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5126
5127 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005128 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005129 /* NOTE well, for some reason allocating one big buffer and dividing it
5130 * up seems to really hurt performance a lot on the P4, so, let's not use
5131 * this... */
5132 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005133
5134 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005135 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005136
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005137 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5138 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5139 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5140 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005141
5142 if ( __kmp_storage_map ) {
5143 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5144 }
5145
5146 /* allocate space for arguments */
5147 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005148 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005149
5150 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5151 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5152 { // Initialize barrier data.
5153 int b;
5154 for ( b = 0; b < bs_last_barrier; ++ b ) {
5155 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005156#if USE_DEBUGGER
5157 team->t.t_bar[ b ].b_master_arrived = 0;
5158 team->t.t_bar[ b ].b_team_arrived = 0;
5159#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005160 }
5161 }
5162
5163#if OMP_40_ENABLED
5164 team->t.t_proc_bind = new_proc_bind;
5165#endif
5166
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005167#if OMPT_SUPPORT
5168 __ompt_team_assign_id(team, ompt_parallel_id);
5169 team->t.ompt_serialized_team_info = NULL;
5170#endif
5171
Jim Cownie5e8470a2013-09-27 10:38:44 +00005172 KMP_MB();
5173
5174 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5175
5176 return team;
5177}
5178
5179/* TODO implement hot-teams at all levels */
5180/* TODO implement lazy thread release on demand (disband request) */
5181
5182/* free the team. return it to the team pool. release all the threads
5183 * associated with it */
5184void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005185__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005186{
5187 int f;
5188 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5189
5190 /* verify state */
5191 KMP_DEBUG_ASSERT( root );
5192 KMP_DEBUG_ASSERT( team );
5193 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5194 KMP_DEBUG_ASSERT( team->t.t_threads );
5195
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005196 int use_hot_team = team == root->r.r_hot_team;
5197#if KMP_NESTED_HOT_TEAMS
5198 int level;
5199 kmp_hot_team_ptr_t *hot_teams;
5200 if( master ) {
5201 level = team->t.t_active_level - 1;
5202 if( master->th.th_teams_microtask ) { // in teams construct?
5203 if( master->th.th_teams_size.nteams > 1 ) {
5204 ++level; // level was not increased in teams construct for team_of_masters
5205 }
5206 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5207 master->th.th_teams_level == team->t.t_level ) {
5208 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5209 } // team->t.t_level will be increased inside parallel
5210 }
5211 hot_teams = master->th.th_hot_teams;
5212 if( level < __kmp_hot_teams_max_level ) {
5213 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5214 use_hot_team = 1;
5215 }
5216 }
5217#endif // KMP_NESTED_HOT_TEAMS
5218
Jim Cownie5e8470a2013-09-27 10:38:44 +00005219 /* team is done working */
5220 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005221 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005222 // Do not reset pointer to parent team to NULL for hot teams.
5223
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005224 /* if we are non-hot team, release our threads */
5225 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005226 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005227 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005228 int tt_idx;
5229 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005230 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5231 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005232 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5233 team->t.t_threads[f]->th.th_task_team = NULL;
5234 }
5235 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005236#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005237 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005238#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005239 team->t.t_task_team[tt_idx] = NULL;
5240 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005241 }
5242 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005243
5244 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005245 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005246 team->t.t_level = 0;
5247 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005248
Jim Cownie5e8470a2013-09-27 10:38:44 +00005249 /* free the worker threads */
5250 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5251 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5252 __kmp_free_thread( team->t.t_threads[ f ] );
5253 team->t.t_threads[ f ] = NULL;
5254 }
5255
Jim Cownie5e8470a2013-09-27 10:38:44 +00005256 /* put the team back in the team pool */
5257 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005258 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005259 __kmp_team_pool = (volatile kmp_team_t*) team;
5260 }
5261
5262 KMP_MB();
5263}
5264
5265
5266/* reap the team. destroy it, reclaim all its resources and free its memory */
5267kmp_team_t *
5268__kmp_reap_team( kmp_team_t *team )
5269{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005270 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005271
5272 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005273 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5274 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5275 KMP_DEBUG_ASSERT( team->t.t_threads );
5276 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005277
5278 /* TODO clean the threads that are a part of this? */
5279
5280 /* free stuff */
5281
5282 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005283 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5284 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005285 __kmp_free( team );
5286
5287 KMP_MB();
5288 return next_pool;
5289}
5290
5291//
5292// Free the thread. Don't reap it, just place it on the pool of available
5293// threads.
5294//
5295// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5296// binding for the affinity mechanism to be useful.
5297//
5298// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5299// However, we want to avoid a potential performance problem by always
5300// scanning through the list to find the correct point at which to insert
5301// the thread (potential N**2 behavior). To do this we keep track of the
5302// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5303// With single-level parallelism, threads will always be added to the tail
5304// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5305// parallelism, all bets are off and we may need to scan through the entire
5306// free list.
5307//
5308// This change also has a potentially large performance benefit, for some
5309// applications. Previously, as threads were freed from the hot team, they
5310// would be placed back on the free list in inverse order. If the hot team
5311// grew back to it's original size, then the freed thread would be placed
5312// back on the hot team in reverse order. This could cause bad cache
5313// locality problems on programs where the size of the hot team regularly
5314// grew and shrunk.
5315//
5316// Now, for single-level parallelism, the OMP tid is alway == gtid.
5317//
5318void
5319__kmp_free_thread( kmp_info_t *this_th )
5320{
5321 int gtid;
5322 kmp_info_t **scan;
5323
5324 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5325 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5326
5327 KMP_DEBUG_ASSERT( this_th );
5328
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005329 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5330 int b;
5331 kmp_balign_t *balign = this_th->th.th_bar;
5332 for (b=0; b<bs_last_barrier; ++b) {
5333 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5334 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5335 balign[b].bb.team = NULL;
5336 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005337 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005338
Jim Cownie5e8470a2013-09-27 10:38:44 +00005339 /* put thread back on the free pool */
5340 TCW_PTR(this_th->th.th_team, NULL);
5341 TCW_PTR(this_th->th.th_root, NULL);
5342 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5343
5344 //
5345 // If the __kmp_thread_pool_insert_pt is already past the new insert
5346 // point, then we need to re-scan the entire list.
5347 //
5348 gtid = this_th->th.th_info.ds.ds_gtid;
5349 if ( __kmp_thread_pool_insert_pt != NULL ) {
5350 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5351 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5352 __kmp_thread_pool_insert_pt = NULL;
5353 }
5354 }
5355
5356 //
5357 // Scan down the list to find the place to insert the thread.
5358 // scan is the address of a link in the list, possibly the address of
5359 // __kmp_thread_pool itself.
5360 //
5361 // In the absence of nested parallism, the for loop will have 0 iterations.
5362 //
5363 if ( __kmp_thread_pool_insert_pt != NULL ) {
5364 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5365 }
5366 else {
5367 scan = (kmp_info_t **)&__kmp_thread_pool;
5368 }
5369 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5370 scan = &( (*scan)->th.th_next_pool ) );
5371
5372 //
5373 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5374 // to its address.
5375 //
5376 TCW_PTR(this_th->th.th_next_pool, *scan);
5377 __kmp_thread_pool_insert_pt = *scan = this_th;
5378 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5379 || ( this_th->th.th_info.ds.ds_gtid
5380 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5381 TCW_4(this_th->th.th_in_pool, TRUE);
5382 __kmp_thread_pool_nth++;
5383
5384 TCW_4(__kmp_nth, __kmp_nth - 1);
5385
5386#ifdef KMP_ADJUST_BLOCKTIME
5387 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005388 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005389 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5390 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5391 if ( __kmp_nth <= __kmp_avail_proc ) {
5392 __kmp_zero_bt = FALSE;
5393 }
5394 }
5395#endif /* KMP_ADJUST_BLOCKTIME */
5396
5397 KMP_MB();
5398}
5399
Jim Cownie5e8470a2013-09-27 10:38:44 +00005400
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401/* ------------------------------------------------------------------------ */
5402
5403void *
5404__kmp_launch_thread( kmp_info_t *this_thr )
5405{
5406 int gtid = this_thr->th.th_info.ds.ds_gtid;
5407/* void *stack_data;*/
5408 kmp_team_t *(*volatile pteam);
5409
5410 KMP_MB();
5411 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5412
5413 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005414 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005415 }
5416
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005417#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005418 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005419 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5420 this_thr->th.ompt_thread_info.wait_id = 0;
5421 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005422 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005423 __ompt_thread_begin(ompt_thread_worker, gtid);
5424 }
5425 }
5426#endif
5427
Jim Cownie5e8470a2013-09-27 10:38:44 +00005428 /* This is the place where threads wait for work */
5429 while( ! TCR_4(__kmp_global.g.g_done) ) {
5430 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5431 KMP_MB();
5432
5433 /* wait for work to do */
5434 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5435
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005436#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005437 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005438 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5439 }
5440#endif
5441
Jim Cownie5e8470a2013-09-27 10:38:44 +00005442 /* No tid yet since not part of a team */
5443 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5444
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005445#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005446 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005447 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5448 }
5449#endif
5450
Jim Cownie5e8470a2013-09-27 10:38:44 +00005451 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5452
5453 /* have we been allocated? */
5454 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005455#if OMPT_SUPPORT
5456 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005457 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005458 if (ompt_enabled) {
5459 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005460 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005461 }
5462#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005463 /* we were just woken up, so run our new task */
5464 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5465 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005466 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5467 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005468
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005469 updateHWFPControl (*pteam);
5470
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005471#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005472 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005473 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005474 // Initialize OMPT task id for implicit task.
5475 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005476 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005477 }
5478#endif
5479
Jonathan Peyton45be4502015-08-11 21:36:41 +00005480 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005481 {
Jonathan Peyton45be4502015-08-11 21:36:41 +00005482 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005483 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5484 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005485 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486 }
Jonathan Peyton45be4502015-08-11 21:36:41 +00005487 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005488 KMP_ASSERT( rc );
5489
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005490#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005491 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005492 /* no frame set while outside task */
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005493 task_info->frame.exit_runtime_frame = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005494
5495 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5496 }
5497#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005498 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005499 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5500 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005501 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005502 /* join barrier after parallel region */
5503 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005504#if OMPT_SUPPORT && OMPT_TRACE
5505 if (ompt_enabled) {
5506 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005507 // don't access *pteam here: it may have already been freed
5508 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005509 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5510 my_parallel_id, task_info->task_id);
5511 }
5512 task_info->frame.exit_runtime_frame = 0;
5513 task_info->task_id = 0;
5514 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005515#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005516 }
5517 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005518 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005519
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005520#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005521 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005522 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5523 __ompt_thread_end(ompt_thread_worker, gtid);
5524 }
5525#endif
5526
Jonathan Peyton54127982015-11-04 21:37:48 +00005527 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005528 /* run the destructors for the threadprivate data for this thread */
5529 __kmp_common_destroy_gtid( gtid );
5530
5531 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5532 KMP_MB();
5533 return this_thr;
5534}
5535
5536/* ------------------------------------------------------------------------ */
5537/* ------------------------------------------------------------------------ */
5538
Jim Cownie5e8470a2013-09-27 10:38:44 +00005539void
5540__kmp_internal_end_dest( void *specific_gtid )
5541{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005542 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005543 #pragma warning( push )
5544 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5545 #endif
5546 // Make sure no significant bits are lost
5547 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005548 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005549 #pragma warning( pop )
5550 #endif
5551
5552 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5553 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5554 * this is because 0 is reserved for the nothing-stored case */
5555
5556 /* josh: One reason for setting the gtid specific data even when it is being
5557 destroyed by pthread is to allow gtid lookup through thread specific data
5558 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5559 that gets executed in the call to __kmp_internal_end_thread, actually
5560 gets the gtid through the thread specific data. Setting it here seems
5561 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5562 to run smoothly.
5563 todo: get rid of this after we remove the dependence on
5564 __kmp_gtid_get_specific
5565 */
5566 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5567 __kmp_gtid_set_specific( gtid );
5568 #ifdef KMP_TDATA_GTID
5569 __kmp_gtid = gtid;
5570 #endif
5571 __kmp_internal_end_thread( gtid );
5572}
5573
Jonathan Peyton99016992015-05-26 17:32:53 +00005574#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005575
5576// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005577// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005578// option in makefile.mk works fine.
5579
5580__attribute__(( destructor ))
5581void
5582__kmp_internal_end_dtor( void )
5583{
5584 __kmp_internal_end_atexit();
5585}
5586
5587void
5588__kmp_internal_end_fini( void )
5589{
5590 __kmp_internal_end_atexit();
5591}
5592
5593#endif
5594
5595/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5596void
5597__kmp_internal_end_atexit( void )
5598{
5599 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5600 /* [Windows]
5601 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5602 stat code that depends on thread specific data for gtid fails because that data becomes
5603 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5604 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5605 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5606
5607// TODO: Can some of this comment about GVS be removed?
5608 I suspect that the offending stat code is executed when the calling thread tries to
5609 clean up a dead root thread's data structures, resulting in GVS code trying to close
5610 the GVS structures for that thread, but since the stat code uses
5611 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5612 cleaning up itself instead of another thread, it gets confused. This happens because
5613 allowing a thread to unregister and cleanup another thread is a recent modification for
5614 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5615 thread may end up trying to unregister another thread only if thread death does not
5616 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5617 specific data destructor function to detect thread death. For Windows dynamic, there
5618 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5619 workaround is applicable only for Windows static stat library.
5620 */
5621 __kmp_internal_end_library( -1 );
5622 #if KMP_OS_WINDOWS
5623 __kmp_close_console();
5624 #endif
5625}
5626
5627static void
5628__kmp_reap_thread(
5629 kmp_info_t * thread,
5630 int is_root
5631) {
5632
Alp Toker8f2d3f02014-02-24 10:40:15 +00005633 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005634
5635 int gtid;
5636
5637 KMP_DEBUG_ASSERT( thread != NULL );
5638
5639 gtid = thread->th.th_info.ds.ds_gtid;
5640
5641 if ( ! is_root ) {
5642
5643 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5644 /* Assume the threads are at the fork barrier here */
5645 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5646 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005647 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5648 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005649 }; // if
5650
Jim Cownie5e8470a2013-09-27 10:38:44 +00005651 // Terminate OS thread.
5652 __kmp_reap_worker( thread );
5653
5654 //
5655 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005656 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005657 //
5658 // There is a small timing hole here - if the worker thread was
5659 // just waking up after sleeping in the pool, had reset it's
5660 // th_active_in_pool flag but not decremented the global counter
5661 // __kmp_thread_pool_active_nth yet, then the global counter
5662 // might not get updated.
5663 //
5664 // Currently, this can only happen as the library is unloaded,
5665 // so there are no harmful side effects.
5666 //
5667 if ( thread->th.th_active_in_pool ) {
5668 thread->th.th_active_in_pool = FALSE;
5669 KMP_TEST_THEN_DEC32(
5670 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5671 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5672 }
5673
5674 // Decrement # of [worker] threads in the pool.
5675 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5676 --__kmp_thread_pool_nth;
5677 }; // if
5678
5679 // Free the fast memory for tasking
5680 #if USE_FAST_MEMORY
5681 __kmp_free_fast_memory( thread );
5682 #endif /* USE_FAST_MEMORY */
5683
5684 __kmp_suspend_uninitialize_thread( thread );
5685
5686 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5687 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5688
5689 -- __kmp_all_nth;
5690 // __kmp_nth was decremented when thread is added to the pool.
5691
5692#ifdef KMP_ADJUST_BLOCKTIME
5693 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005694 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005695 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5696 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5697 if ( __kmp_nth <= __kmp_avail_proc ) {
5698 __kmp_zero_bt = FALSE;
5699 }
5700 }
5701#endif /* KMP_ADJUST_BLOCKTIME */
5702
5703 /* free the memory being used */
5704 if( __kmp_env_consistency_check ) {
5705 if ( thread->th.th_cons ) {
5706 __kmp_free_cons_stack( thread->th.th_cons );
5707 thread->th.th_cons = NULL;
5708 }; // if
5709 }
5710
5711 if ( thread->th.th_pri_common != NULL ) {
5712 __kmp_free( thread->th.th_pri_common );
5713 thread->th.th_pri_common = NULL;
5714 }; // if
5715
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005716 if (thread->th.th_task_state_memo_stack != NULL) {
5717 __kmp_free(thread->th.th_task_state_memo_stack);
5718 thread->th.th_task_state_memo_stack = NULL;
5719 }
5720
Jim Cownie5e8470a2013-09-27 10:38:44 +00005721 #if KMP_USE_BGET
5722 if ( thread->th.th_local.bget_data != NULL ) {
5723 __kmp_finalize_bget( thread );
5724 }; // if
5725 #endif
5726
Alp Toker98758b02014-03-02 04:12:06 +00005727#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005728 if ( thread->th.th_affin_mask != NULL ) {
5729 KMP_CPU_FREE( thread->th.th_affin_mask );
5730 thread->th.th_affin_mask = NULL;
5731 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005732#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005733
5734 __kmp_reap_team( thread->th.th_serial_team );
5735 thread->th.th_serial_team = NULL;
5736 __kmp_free( thread );
5737
5738 KMP_MB();
5739
5740} // __kmp_reap_thread
5741
5742static void
5743__kmp_internal_end(void)
5744{
5745 int i;
5746
5747 /* First, unregister the library */
5748 __kmp_unregister_library();
5749
5750 #if KMP_OS_WINDOWS
5751 /* In Win static library, we can't tell when a root actually dies, so we
5752 reclaim the data structures for any root threads that have died but not
5753 unregistered themselves, in order to shut down cleanly.
5754 In Win dynamic library we also can't tell when a thread dies.
5755 */
5756 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5757 #endif
5758
5759 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5760 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005761 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005762 break;
5763 KMP_MB(); /* Flush all pending memory write invalidates. */
5764 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5765
5766 if ( i < __kmp_threads_capacity ) {
5767 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5768 KMP_MB(); /* Flush all pending memory write invalidates. */
5769
5770 //
5771 // Need to check that monitor was initialized before reaping it.
5772 // If we are called form __kmp_atfork_child (which sets
5773 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5774 // contain valid data, but it is only valid in the parent process,
5775 // not the child.
5776 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005777 // New behavior (201008): instead of keying off of the flag
5778 // __kmp_init_parallel, the monitor thread creation is keyed off
5779 // of the new flag __kmp_init_monitor.
5780 //
5781 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5782 if ( TCR_4( __kmp_init_monitor ) ) {
5783 __kmp_reap_monitor( & __kmp_monitor );
5784 TCW_4( __kmp_init_monitor, 0 );
5785 }
5786 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5787 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5788 } else {
5789 /* TODO move this to cleanup code */
5790 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005791 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005792 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5793 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005794// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005795 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005796 }
5797 }
5798 #endif
5799
5800 KMP_MB();
5801
5802 // Reap the worker threads.
5803 // This is valid for now, but be careful if threads are reaped sooner.
5804 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5805 // Get the next thread from the pool.
5806 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5807 __kmp_thread_pool = thread->th.th_next_pool;
5808 // Reap it.
5809 thread->th.th_next_pool = NULL;
5810 thread->th.th_in_pool = FALSE;
5811 __kmp_reap_thread( thread, 0 );
5812 }; // while
5813 __kmp_thread_pool_insert_pt = NULL;
5814
5815 // Reap teams.
5816 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5817 // Get the next team from the pool.
5818 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5819 __kmp_team_pool = team->t.t_next_pool;
5820 // Reap it.
5821 team->t.t_next_pool = NULL;
5822 __kmp_reap_team( team );
5823 }; // while
5824
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005825 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005826
5827 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5828 // TBD: Add some checking...
5829 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5830 }
5831
5832 /* Make sure all threadprivate destructors get run by joining with all worker
5833 threads before resetting this flag */
5834 TCW_SYNC_4(__kmp_init_common, FALSE);
5835
5836 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5837 KMP_MB();
5838
5839 //
5840 // See note above: One of the possible fixes for CQ138434 / CQ140126
5841 //
5842 // FIXME: push both code fragments down and CSE them?
5843 // push them into __kmp_cleanup() ?
5844 //
5845 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5846 if ( TCR_4( __kmp_init_monitor ) ) {
5847 __kmp_reap_monitor( & __kmp_monitor );
5848 TCW_4( __kmp_init_monitor, 0 );
5849 }
5850 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5851 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5852
5853 } /* else !__kmp_global.t_active */
5854 TCW_4(__kmp_init_gtid, FALSE);
5855 KMP_MB(); /* Flush all pending memory write invalidates. */
5856
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005858#if OMPT_SUPPORT
5859 ompt_fini();
5860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861}
5862
5863void
5864__kmp_internal_end_library( int gtid_req )
5865{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005866 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5867 /* this shouldn't be a race condition because __kmp_internal_end() is the
5868 * only place to clear __kmp_serial_init */
5869 /* we'll check this later too, after we get the lock */
5870 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5871 // because the next check will work in any case.
5872 if( __kmp_global.g.g_abort ) {
5873 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5874 /* TODO abort? */
5875 return;
5876 }
5877 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5878 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5879 return;
5880 }
5881
5882
5883 KMP_MB(); /* Flush all pending memory write invalidates. */
5884
5885 /* find out who we are and what we should do */
5886 {
5887 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5888 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5889 if( gtid == KMP_GTID_SHUTDOWN ) {
5890 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5891 return;
5892 } else if( gtid == KMP_GTID_MONITOR ) {
5893 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5894 return;
5895 } else if( gtid == KMP_GTID_DNE ) {
5896 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5897 /* we don't know who we are, but we may still shutdown the library */
5898 } else if( KMP_UBER_GTID( gtid )) {
5899 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005900 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005901 __kmp_global.g.g_abort = -1;
5902 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5903 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5904 return;
5905 } else {
5906 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5907 __kmp_unregister_root_current_thread( gtid );
5908 }
5909 } else {
5910 /* worker threads may call this function through the atexit handler, if they call exit() */
5911 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5912 TODO: do a thorough shutdown instead
5913 */
5914 #ifdef DUMP_DEBUG_ON_EXIT
5915 if ( __kmp_debug_buf )
5916 __kmp_dump_debug_buffer( );
5917 #endif
5918 return;
5919 }
5920 }
5921 /* synchronize the termination process */
5922 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5923
5924 /* have we already finished */
5925 if( __kmp_global.g.g_abort ) {
5926 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5927 /* TODO abort? */
5928 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5929 return;
5930 }
5931 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5932 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5933 return;
5934 }
5935
5936 /* We need this lock to enforce mutex between this reading of
5937 __kmp_threads_capacity and the writing by __kmp_register_root.
5938 Alternatively, we can use a counter of roots that is
5939 atomically updated by __kmp_get_global_thread_id_reg,
5940 __kmp_do_serial_initialize and __kmp_internal_end_*.
5941 */
5942 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5943
5944 /* now we can safely conduct the actual termination */
5945 __kmp_internal_end();
5946
5947 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5948 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5949
5950 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5951
5952 #ifdef DUMP_DEBUG_ON_EXIT
5953 if ( __kmp_debug_buf )
5954 __kmp_dump_debug_buffer();
5955 #endif
5956
5957 #if KMP_OS_WINDOWS
5958 __kmp_close_console();
5959 #endif
5960
5961 __kmp_fini_allocator();
5962
5963} // __kmp_internal_end_library
5964
5965void
5966__kmp_internal_end_thread( int gtid_req )
5967{
5968 int i;
5969
5970 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5971 /* this shouldn't be a race condition because __kmp_internal_end() is the
5972 * only place to clear __kmp_serial_init */
5973 /* we'll check this later too, after we get the lock */
5974 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
5975 // because the next check will work in any case.
5976 if( __kmp_global.g.g_abort ) {
5977 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
5978 /* TODO abort? */
5979 return;
5980 }
5981 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5982 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
5983 return;
5984 }
5985
5986 KMP_MB(); /* Flush all pending memory write invalidates. */
5987
5988 /* find out who we are and what we should do */
5989 {
5990 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5991 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5992 if( gtid == KMP_GTID_SHUTDOWN ) {
5993 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5994 return;
5995 } else if( gtid == KMP_GTID_MONITOR ) {
5996 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5997 return;
5998 } else if( gtid == KMP_GTID_DNE ) {
5999 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6000 return;
6001 /* we don't know who we are */
6002 } else if( KMP_UBER_GTID( gtid )) {
6003 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006004 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006005 __kmp_global.g.g_abort = -1;
6006 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6007 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6008 return;
6009 } else {
6010 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6011 __kmp_unregister_root_current_thread( gtid );
6012 }
6013 } else {
6014 /* just a worker thread, let's leave */
6015 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6016
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006017 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006018 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006019 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006020
6021 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6022 return;
6023 }
6024 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006025 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006026 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6027 // because we will better shutdown later in the library destructor.
6028 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006029 // in a loop forks and joins many openmp threads. We can save a lot of time
6030 // keeping worker threads alive until the program shutdown.
6031 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6032 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006033 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006034 return;
6035 #endif
6036 /* synchronize the termination process */
6037 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6038
6039 /* have we already finished */
6040 if( __kmp_global.g.g_abort ) {
6041 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6042 /* TODO abort? */
6043 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6044 return;
6045 }
6046 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6047 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6048 return;
6049 }
6050
6051 /* We need this lock to enforce mutex between this reading of
6052 __kmp_threads_capacity and the writing by __kmp_register_root.
6053 Alternatively, we can use a counter of roots that is
6054 atomically updated by __kmp_get_global_thread_id_reg,
6055 __kmp_do_serial_initialize and __kmp_internal_end_*.
6056 */
6057
6058 /* should we finish the run-time? are all siblings done? */
6059 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6060
6061 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6062 if ( KMP_UBER_GTID( i ) ) {
6063 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6064 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6065 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6066 return;
6067 };
6068 }
6069
6070 /* now we can safely conduct the actual termination */
6071
6072 __kmp_internal_end();
6073
6074 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6075 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6076
Jim Cownie77c2a632014-09-03 11:34:33 +00006077 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006078
6079 #ifdef DUMP_DEBUG_ON_EXIT
6080 if ( __kmp_debug_buf )
6081 __kmp_dump_debug_buffer();
6082 #endif
6083} // __kmp_internal_end_thread
6084
6085// -------------------------------------------------------------------------------------------------
6086// Library registration stuff.
6087
6088static long __kmp_registration_flag = 0;
6089 // Random value used to indicate library initialization.
6090static char * __kmp_registration_str = NULL;
6091 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6092
6093
6094static inline
6095char *
6096__kmp_reg_status_name() {
6097 /*
6098 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6099 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6100 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6101 */
6102 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6103} // __kmp_reg_status_get
6104
6105
6106void
6107__kmp_register_library_startup(
6108 void
6109) {
6110
6111 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6112 int done = 0;
6113 union {
6114 double dtime;
6115 long ltime;
6116 } time;
6117 #if KMP_OS_WINDOWS
6118 __kmp_initialize_system_tick();
6119 #endif
6120 __kmp_read_system_time( & time.dtime );
6121 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6122 __kmp_registration_str =
6123 __kmp_str_format(
6124 "%p-%lx-%s",
6125 & __kmp_registration_flag,
6126 __kmp_registration_flag,
6127 KMP_LIBRARY_FILE
6128 );
6129
6130 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6131
6132 while ( ! done ) {
6133
6134 char * value = NULL; // Actual value of the environment variable.
6135
6136 // Set environment variable, but do not overwrite if it is exist.
6137 __kmp_env_set( name, __kmp_registration_str, 0 );
6138 // Check the variable is written.
6139 value = __kmp_env_get( name );
6140 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6141
6142 done = 1; // Ok, environment variable set successfully, exit the loop.
6143
6144 } else {
6145
6146 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6147 // Check whether it alive or dead.
6148 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6149 char * tail = value;
6150 char * flag_addr_str = NULL;
6151 char * flag_val_str = NULL;
6152 char const * file_name = NULL;
6153 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6154 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6155 file_name = tail;
6156 if ( tail != NULL ) {
6157 long * flag_addr = 0;
6158 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006159 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6160 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006161 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6162 // First, check whether environment-encoded address is mapped into addr space.
6163 // If so, dereference it to see if it still has the right value.
6164
6165 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6166 neighbor = 1;
6167 } else {
6168 // If not, then we know the other copy of the library is no longer running.
6169 neighbor = 2;
6170 }; // if
6171 }; // if
6172 }; // if
6173 switch ( neighbor ) {
6174 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6175 // Assume it is the incompatible format of future version of the library.
6176 // Assume the other library is alive.
6177 // WARN( ... ); // TODO: Issue a warning.
6178 file_name = "unknown library";
6179 // Attention! Falling to the next case. That's intentional.
6180 case 1 : { // Neighbor is alive.
6181 // Check it is allowed.
6182 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6183 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6184 // That's not allowed. Issue fatal error.
6185 __kmp_msg(
6186 kmp_ms_fatal,
6187 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6188 KMP_HNT( DuplicateLibrary ),
6189 __kmp_msg_null
6190 );
6191 }; // if
6192 KMP_INTERNAL_FREE( duplicate_ok );
6193 __kmp_duplicate_library_ok = 1;
6194 done = 1; // Exit the loop.
6195 } break;
6196 case 2 : { // Neighbor is dead.
6197 // Clear the variable and try to register library again.
6198 __kmp_env_unset( name );
6199 } break;
6200 default : {
6201 KMP_DEBUG_ASSERT( 0 );
6202 } break;
6203 }; // switch
6204
6205 }; // if
6206 KMP_INTERNAL_FREE( (void *) value );
6207
6208 }; // while
6209 KMP_INTERNAL_FREE( (void *) name );
6210
6211} // func __kmp_register_library_startup
6212
6213
6214void
6215__kmp_unregister_library( void ) {
6216
6217 char * name = __kmp_reg_status_name();
6218 char * value = __kmp_env_get( name );
6219
6220 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6221 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6222 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6223 // Ok, this is our variable. Delete it.
6224 __kmp_env_unset( name );
6225 }; // if
6226
6227 KMP_INTERNAL_FREE( __kmp_registration_str );
6228 KMP_INTERNAL_FREE( value );
6229 KMP_INTERNAL_FREE( name );
6230
6231 __kmp_registration_flag = 0;
6232 __kmp_registration_str = NULL;
6233
6234} // __kmp_unregister_library
6235
6236
6237// End of Library registration stuff.
6238// -------------------------------------------------------------------------------------------------
6239
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006240#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6241
6242static void __kmp_check_mic_type()
6243{
6244 kmp_cpuid_t cpuid_state = {0};
6245 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006246 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006247 // We don't support mic1 at the moment
6248 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6249 __kmp_mic_type = mic2;
6250 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6251 __kmp_mic_type = mic3;
6252 } else {
6253 __kmp_mic_type = non_mic;
6254 }
6255}
6256
6257#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6258
Jim Cownie5e8470a2013-09-27 10:38:44 +00006259static void
6260__kmp_do_serial_initialize( void )
6261{
6262 int i, gtid;
6263 int size;
6264
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006265 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006266
6267 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6268 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6269 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6270 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6271 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6272
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006273#if OMPT_SUPPORT
6274 ompt_pre_init();
6275#endif
6276
Jim Cownie5e8470a2013-09-27 10:38:44 +00006277 __kmp_validate_locks();
6278
6279 /* Initialize internal memory allocator */
6280 __kmp_init_allocator();
6281
6282 /* Register the library startup via an environment variable
6283 and check to see whether another copy of the library is already
6284 registered. */
6285
6286 __kmp_register_library_startup( );
6287
6288 /* TODO reinitialization of library */
6289 if( TCR_4(__kmp_global.g.g_done) ) {
6290 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6291 }
6292
6293 __kmp_global.g.g_abort = 0;
6294 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6295
6296 /* initialize the locks */
6297#if KMP_USE_ADAPTIVE_LOCKS
6298#if KMP_DEBUG_ADAPTIVE_LOCKS
6299 __kmp_init_speculative_stats();
6300#endif
6301#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006302#if KMP_STATS_ENABLED
6303 __kmp_init_tas_lock( & __kmp_stats_lock );
6304#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006305 __kmp_init_lock( & __kmp_global_lock );
6306 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6307 __kmp_init_lock( & __kmp_debug_lock );
6308 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6309 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6310 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6311 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6312 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6313 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6314 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6315 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6316 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6317 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6318 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6319 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6320 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6321 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6322 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6323 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6324 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6325
6326 /* conduct initialization and initial setup of configuration */
6327
6328 __kmp_runtime_initialize();
6329
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006330#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6331 __kmp_check_mic_type();
6332#endif
6333
Jim Cownie5e8470a2013-09-27 10:38:44 +00006334 // Some global variable initialization moved here from kmp_env_initialize()
6335#ifdef KMP_DEBUG
6336 kmp_diag = 0;
6337#endif
6338 __kmp_abort_delay = 0;
6339
6340 // From __kmp_init_dflt_team_nth()
6341 /* assume the entire machine will be used */
6342 __kmp_dflt_team_nth_ub = __kmp_xproc;
6343 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6344 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6345 }
6346 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6347 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6348 }
6349 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006350
6351 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6352 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6353 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6354 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6355 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6356 __kmp_library = library_throughput;
6357 // From KMP_SCHEDULE initialization
6358 __kmp_static = kmp_sch_static_balanced;
6359 // AC: do not use analytical here, because it is non-monotonous
6360 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006361 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6363 // control parts
6364 #if KMP_FAST_REDUCTION_BARRIER
6365 #define kmp_reduction_barrier_gather_bb ((int)1)
6366 #define kmp_reduction_barrier_release_bb ((int)1)
6367 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6368 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6369 #endif // KMP_FAST_REDUCTION_BARRIER
6370 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6371 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6372 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6373 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6374 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6375 #if KMP_FAST_REDUCTION_BARRIER
6376 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6377 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6378 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6379 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6380 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6381 }
6382 #endif // KMP_FAST_REDUCTION_BARRIER
6383 }
6384 #if KMP_FAST_REDUCTION_BARRIER
6385 #undef kmp_reduction_barrier_release_pat
6386 #undef kmp_reduction_barrier_gather_pat
6387 #undef kmp_reduction_barrier_release_bb
6388 #undef kmp_reduction_barrier_gather_bb
6389 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006390#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006391 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006392 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006393 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006394 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6395 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6396 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6397 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006398#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006399 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006400 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6401 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6402 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006403#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006404#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006405
6406 // From KMP_CHECKS initialization
6407#ifdef KMP_DEBUG
6408 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6409#else
6410 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6411#endif
6412
6413 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6414 __kmp_foreign_tp = TRUE;
6415
6416 __kmp_global.g.g_dynamic = FALSE;
6417 __kmp_global.g.g_dynamic_mode = dynamic_default;
6418
6419 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006420
Jim Cownie5e8470a2013-09-27 10:38:44 +00006421 // Print all messages in message catalog for testing purposes.
6422 #ifdef KMP_DEBUG
6423 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6424 if ( __kmp_str_match_true( val ) ) {
6425 kmp_str_buf_t buffer;
6426 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006427 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006428 __kmp_printf( "%s", buffer.str );
6429 __kmp_str_buf_free( & buffer );
6430 }; // if
6431 __kmp_env_free( & val );
6432 #endif
6433
Jim Cownie181b4bb2013-12-23 17:28:57 +00006434 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006435 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6436 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6437
Jim Cownie5e8470a2013-09-27 10:38:44 +00006438 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6439 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6440 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6441 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6442 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6443 __kmp_thread_pool = NULL;
6444 __kmp_thread_pool_insert_pt = NULL;
6445 __kmp_team_pool = NULL;
6446
6447 /* Allocate all of the variable sized records */
6448 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6449 /* Since allocation is cache-aligned, just add extra padding at the end */
6450 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6451 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6452 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6453
6454 /* init thread counts */
6455 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6456 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6457 __kmp_all_nth = 0;
6458 __kmp_nth = 0;
6459
6460 /* setup the uber master thread and hierarchy */
6461 gtid = __kmp_register_root( TRUE );
6462 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6463 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6464 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6465
6466 KMP_MB(); /* Flush all pending memory write invalidates. */
6467
6468 __kmp_common_initialize();
6469
6470 #if KMP_OS_UNIX
6471 /* invoke the child fork handler */
6472 __kmp_register_atfork();
6473 #endif
6474
Jonathan Peyton99016992015-05-26 17:32:53 +00006475 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006476 {
6477 /* Invoke the exit handler when the program finishes, only for static library.
6478 For dynamic library, we already have _fini and DllMain.
6479 */
6480 int rc = atexit( __kmp_internal_end_atexit );
6481 if ( rc != 0 ) {
6482 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6483 }; // if
6484 }
6485 #endif
6486
6487 #if KMP_HANDLE_SIGNALS
6488 #if KMP_OS_UNIX
6489 /* NOTE: make sure that this is called before the user installs
6490 * their own signal handlers so that the user handlers
6491 * are called first. this way they can return false,
6492 * not call our handler, avoid terminating the library,
6493 * and continue execution where they left off. */
6494 __kmp_install_signals( FALSE );
6495 #endif /* KMP_OS_UNIX */
6496 #if KMP_OS_WINDOWS
6497 __kmp_install_signals( TRUE );
6498 #endif /* KMP_OS_WINDOWS */
6499 #endif
6500
6501 /* we have finished the serial initialization */
6502 __kmp_init_counter ++;
6503
6504 __kmp_init_serial = TRUE;
6505
6506 if (__kmp_settings) {
6507 __kmp_env_print();
6508 }
6509
6510#if OMP_40_ENABLED
6511 if (__kmp_display_env || __kmp_display_env_verbose) {
6512 __kmp_env_print_2();
6513 }
6514#endif // OMP_40_ENABLED
6515
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006516#if OMPT_SUPPORT
6517 ompt_post_init();
6518#endif
6519
Jim Cownie5e8470a2013-09-27 10:38:44 +00006520 KMP_MB();
6521
6522 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6523}
6524
6525void
6526__kmp_serial_initialize( void )
6527{
6528 if ( __kmp_init_serial ) {
6529 return;
6530 }
6531 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6532 if ( __kmp_init_serial ) {
6533 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6534 return;
6535 }
6536 __kmp_do_serial_initialize();
6537 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6538}
6539
6540static void
6541__kmp_do_middle_initialize( void )
6542{
6543 int i, j;
6544 int prev_dflt_team_nth;
6545
6546 if( !__kmp_init_serial ) {
6547 __kmp_do_serial_initialize();
6548 }
6549
6550 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6551
6552 //
6553 // Save the previous value for the __kmp_dflt_team_nth so that
6554 // we can avoid some reinitialization if it hasn't changed.
6555 //
6556 prev_dflt_team_nth = __kmp_dflt_team_nth;
6557
Alp Toker98758b02014-03-02 04:12:06 +00006558#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006559 //
6560 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6561 // number of cores on the machine.
6562 //
6563 __kmp_affinity_initialize();
6564
6565 //
6566 // Run through the __kmp_threads array and set the affinity mask
6567 // for each root thread that is currently registered with the RTL.
6568 //
6569 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6570 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6571 __kmp_affinity_set_init_mask( i, TRUE );
6572 }
6573 }
Alp Toker98758b02014-03-02 04:12:06 +00006574#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006575
6576 KMP_ASSERT( __kmp_xproc > 0 );
6577 if ( __kmp_avail_proc == 0 ) {
6578 __kmp_avail_proc = __kmp_xproc;
6579 }
6580
6581 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6582 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006583 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006584 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6585 j++;
6586 }
6587
6588 if ( __kmp_dflt_team_nth == 0 ) {
6589#ifdef KMP_DFLT_NTH_CORES
6590 //
6591 // Default #threads = #cores
6592 //
6593 __kmp_dflt_team_nth = __kmp_ncores;
6594 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6595 __kmp_dflt_team_nth ) );
6596#else
6597 //
6598 // Default #threads = #available OS procs
6599 //
6600 __kmp_dflt_team_nth = __kmp_avail_proc;
6601 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6602 __kmp_dflt_team_nth ) );
6603#endif /* KMP_DFLT_NTH_CORES */
6604 }
6605
6606 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6607 __kmp_dflt_team_nth = KMP_MIN_NTH;
6608 }
6609 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6610 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6611 }
6612
6613 //
6614 // There's no harm in continuing if the following check fails,
6615 // but it indicates an error in the previous logic.
6616 //
6617 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6618
6619 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6620 //
6621 // Run through the __kmp_threads array and set the num threads icv
6622 // for each root thread that is currently registered with the RTL
6623 // (which has not already explicitly set its nthreads-var with a
6624 // call to omp_set_num_threads()).
6625 //
6626 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6627 kmp_info_t *thread = __kmp_threads[ i ];
6628 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006629 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006630
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006631 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006632 }
6633 }
6634 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6635 __kmp_dflt_team_nth) );
6636
6637#ifdef KMP_ADJUST_BLOCKTIME
6638 /* Adjust blocktime to zero if necessary */
6639 /* now that __kmp_avail_proc is set */
6640 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6641 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6642 if ( __kmp_nth > __kmp_avail_proc ) {
6643 __kmp_zero_bt = TRUE;
6644 }
6645 }
6646#endif /* KMP_ADJUST_BLOCKTIME */
6647
6648 /* we have finished middle initialization */
6649 TCW_SYNC_4(__kmp_init_middle, TRUE);
6650
6651 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6652}
6653
6654void
6655__kmp_middle_initialize( void )
6656{
6657 if ( __kmp_init_middle ) {
6658 return;
6659 }
6660 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6661 if ( __kmp_init_middle ) {
6662 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6663 return;
6664 }
6665 __kmp_do_middle_initialize();
6666 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6667}
6668
6669void
6670__kmp_parallel_initialize( void )
6671{
6672 int gtid = __kmp_entry_gtid(); // this might be a new root
6673
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006674 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006675 if( TCR_4(__kmp_init_parallel) ) return;
6676 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6677 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6678
6679 /* TODO reinitialization after we have already shut down */
6680 if( TCR_4(__kmp_global.g.g_done) ) {
6681 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6682 __kmp_infinite_loop();
6683 }
6684
6685 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6686 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6687 */
6688 if( !__kmp_init_middle ) {
6689 __kmp_do_middle_initialize();
6690 }
6691
6692 /* begin initialization */
6693 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6694 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6695
6696#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6697 //
6698 // Save the FP control regs.
6699 // Worker threads will set theirs to these values at thread startup.
6700 //
6701 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6702 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6703 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6704#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6705
6706#if KMP_OS_UNIX
6707# if KMP_HANDLE_SIGNALS
6708 /* must be after __kmp_serial_initialize */
6709 __kmp_install_signals( TRUE );
6710# endif
6711#endif
6712
6713 __kmp_suspend_initialize();
6714
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006715#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006716 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6717 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6718 }
6719#else
6720 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6721 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6722 }
6723#endif
6724
6725 if ( __kmp_version ) {
6726 __kmp_print_version_2();
6727 }
6728
Jim Cownie5e8470a2013-09-27 10:38:44 +00006729 /* we have finished parallel initialization */
6730 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6731
6732 KMP_MB();
6733 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6734
6735 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6736}
6737
6738
6739/* ------------------------------------------------------------------------ */
6740
6741void
6742__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6743 kmp_team_t *team )
6744{
6745 kmp_disp_t *dispatch;
6746
6747 KMP_MB();
6748
6749 /* none of the threads have encountered any constructs, yet. */
6750 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006752 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006753#endif /* KMP_CACHE_MANAGE */
6754 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6755 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006756 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6757 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006758
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006759 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006760#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006761 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6762#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006763 if( __kmp_env_consistency_check )
6764 __kmp_push_parallel( gtid, team->t.t_ident );
6765
6766 KMP_MB(); /* Flush all pending memory write invalidates. */
6767}
6768
6769void
6770__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6771 kmp_team_t *team )
6772{
6773 if( __kmp_env_consistency_check )
6774 __kmp_pop_parallel( gtid, team->t.t_ident );
6775}
6776
6777int
6778__kmp_invoke_task_func( int gtid )
6779{
6780 int rc;
6781 int tid = __kmp_tid_from_gtid( gtid );
6782 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006783 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006784
6785 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6786#if USE_ITT_BUILD
6787 if ( __itt_stack_caller_create_ptr ) {
6788 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6789 }
6790#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006791#if INCLUDE_SSC_MARKS
6792 SSC_MARK_INVOKING();
6793#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006794
6795#if OMPT_SUPPORT
6796 void *dummy;
6797 void **exit_runtime_p;
6798 ompt_task_id_t my_task_id;
6799 ompt_parallel_id_t my_parallel_id;
6800
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006801 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006802 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6803 ompt_task_info.frame.exit_runtime_frame);
6804 } else {
6805 exit_runtime_p = &dummy;
6806 }
6807
6808#if OMPT_TRACE
6809 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6810 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006811 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006812 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6813 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6814 my_parallel_id, my_task_id);
6815 }
6816#endif
6817#endif
6818
Jonathan Peyton45be4502015-08-11 21:36:41 +00006819 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006820 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6821 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006822 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6823 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006824#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006825 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006826#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006827 );
6828 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006829
Jim Cownie5e8470a2013-09-27 10:38:44 +00006830#if USE_ITT_BUILD
6831 if ( __itt_stack_caller_create_ptr ) {
6832 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6833 }
6834#endif /* USE_ITT_BUILD */
6835 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6836
6837 return rc;
6838}
6839
6840#if OMP_40_ENABLED
6841void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006842__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006843{
6844 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006845 kmp_info_t *thr = __kmp_threads[ gtid ];
6846 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006847 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006848 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6849 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6850 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006852 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006853 // Launch league of teams now, but not let workers execute
6854 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006855#if INCLUDE_SSC_MARKS
6856 SSC_MARK_FORKING();
6857#endif
6858 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006859 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006860#if OMPT_SUPPORT
6861 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6862#endif
6863 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006864 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6865 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006866#if INCLUDE_SSC_MARKS
6867 SSC_MARK_JOINING();
6868#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006869
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006870 // AC: last parameter "1" eliminates join barrier which won't work because
6871 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006872 __kmp_join_call( loc, gtid
6873#if OMPT_SUPPORT
6874 , fork_context_intel
6875#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006876 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006877}
6878
6879int
6880__kmp_invoke_teams_master( int gtid )
6881{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006882 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6883 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884 #if KMP_DEBUG
6885 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6886 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6887 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006888 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6889 __kmp_teams_master( gtid );
6890 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006891 return 1;
6892}
6893#endif /* OMP_40_ENABLED */
6894
6895/* this sets the requested number of threads for the next parallel region
6896 * encountered by this team */
6897/* since this should be enclosed in the forkjoin critical section it
6898 * should avoid race conditions with assymmetrical nested parallelism */
6899
6900void
6901__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6902{
6903 kmp_info_t *thr = __kmp_threads[gtid];
6904
6905 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006906 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006907}
6908
6909#if OMP_40_ENABLED
6910
6911/* this sets the requested number of teams for the teams region and/or
6912 * the number of threads for the next parallel region encountered */
6913void
6914__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6915{
6916 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006917 KMP_DEBUG_ASSERT(num_teams >= 0);
6918 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006919
6920 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006922 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6923 if ( !__kmp_reserve_warn ) {
6924 __kmp_reserve_warn = 1;
6925 __kmp_msg(
6926 kmp_ms_warning,
6927 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6928 KMP_HNT( Unset_ALL_THREADS ),
6929 __kmp_msg_null
6930 );
6931 }
6932 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006933 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006934 // Set number of teams (number of threads in the outer "parallel" of the teams)
6935 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6936
6937 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006938 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006939 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006940 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006941 num_threads = __kmp_avail_proc / num_teams;
6942 if( num_teams * num_threads > __kmp_max_nth ) {
6943 // adjust num_threads w/o warning as it is not user setting
6944 num_threads = __kmp_max_nth / num_teams;
6945 }
6946 } else {
6947 if( num_teams * num_threads > __kmp_max_nth ) {
6948 int new_threads = __kmp_max_nth / num_teams;
6949 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6950 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6951 __kmp_msg(
6952 kmp_ms_warning,
6953 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6954 KMP_HNT( Unset_ALL_THREADS ),
6955 __kmp_msg_null
6956 );
6957 }
6958 num_threads = new_threads;
6959 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006961 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006962}
6963
6964
6965//
6966// Set the proc_bind var to use in the following parallel region.
6967//
6968void
6969__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
6970{
6971 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006972 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006973}
6974
6975#endif /* OMP_40_ENABLED */
6976
6977/* Launch the worker threads into the microtask. */
6978
6979void
6980__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
6981{
6982 kmp_info_t *this_thr = __kmp_threads[gtid];
6983
6984#ifdef KMP_DEBUG
6985 int f;
6986#endif /* KMP_DEBUG */
6987
6988 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006989 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006990 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6991 KMP_MB(); /* Flush all pending memory write invalidates. */
6992
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006993 team->t.t_construct = 0; /* no single directives seen yet */
6994 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006995
6996 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006997 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006998 if ( team->t.t_max_nproc > 1 ) {
6999 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007000 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007001 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007002#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007003 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7004#endif
7005 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007006 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007007 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007008#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007009 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7010#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007011 }
7012
7013 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007014 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007015
7016#ifdef KMP_DEBUG
7017 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7018 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7019 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7020 }
7021#endif /* KMP_DEBUG */
7022
7023 /* release the worker threads so they may begin working */
7024 __kmp_fork_barrier( gtid, 0 );
7025}
7026
7027
7028void
7029__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7030{
7031 kmp_info_t *this_thr = __kmp_threads[gtid];
7032
7033 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007034 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7036 KMP_MB(); /* Flush all pending memory write invalidates. */
7037
7038 /* Join barrier after fork */
7039
7040#ifdef KMP_DEBUG
7041 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7042 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7043 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7044 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7045 __kmp_print_structure();
7046 }
7047 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7048 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7049#endif /* KMP_DEBUG */
7050
7051 __kmp_join_barrier( gtid ); /* wait for everyone */
7052
7053 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007054 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007055}
7056
7057
7058/* ------------------------------------------------------------------------ */
7059/* ------------------------------------------------------------------------ */
7060
7061#ifdef USE_LOAD_BALANCE
7062
7063//
7064// Return the worker threads actively spinning in the hot team, if we
7065// are at the outermost level of parallelism. Otherwise, return 0.
7066//
7067static int
7068__kmp_active_hot_team_nproc( kmp_root_t *root )
7069{
7070 int i;
7071 int retval;
7072 kmp_team_t *hot_team;
7073
7074 if ( root->r.r_active ) {
7075 return 0;
7076 }
7077 hot_team = root->r.r_hot_team;
7078 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7079 return hot_team->t.t_nproc - 1; // Don't count master thread
7080 }
7081
7082 //
7083 // Skip the master thread - it is accounted for elsewhere.
7084 //
7085 retval = 0;
7086 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7087 if ( hot_team->t.t_threads[i]->th.th_active ) {
7088 retval++;
7089 }
7090 }
7091 return retval;
7092}
7093
7094//
7095// Perform an automatic adjustment to the number of
7096// threads used by the next parallel region.
7097//
7098static int
7099__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7100{
7101 int retval;
7102 int pool_active;
7103 int hot_team_active;
7104 int team_curr_active;
7105 int system_active;
7106
7107 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7108 root, set_nproc ) );
7109 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007110 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007111 KMP_DEBUG_ASSERT( set_nproc > 1 );
7112
7113 if ( set_nproc == 1) {
7114 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7115 return 1;
7116 }
7117
7118 //
7119 // Threads that are active in the thread pool, active in the hot team
7120 // for this particular root (if we are at the outer par level), and
7121 // the currently executing thread (to become the master) are available
7122 // to add to the new team, but are currently contributing to the system
7123 // load, and must be accounted for.
7124 //
7125 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7126 hot_team_active = __kmp_active_hot_team_nproc( root );
7127 team_curr_active = pool_active + hot_team_active + 1;
7128
7129 //
7130 // Check the system load.
7131 //
7132 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7133 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7134 system_active, pool_active, hot_team_active ) );
7135
7136 if ( system_active < 0 ) {
7137 //
7138 // There was an error reading the necessary info from /proc,
7139 // so use the thread limit algorithm instead. Once we set
7140 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7141 // we shouldn't wind up getting back here.
7142 //
7143 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7144 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7145
7146 //
7147 // Make this call behave like the thread limit algorithm.
7148 //
7149 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7150 : root->r.r_hot_team->t.t_nproc);
7151 if ( retval > set_nproc ) {
7152 retval = set_nproc;
7153 }
7154 if ( retval < KMP_MIN_NTH ) {
7155 retval = KMP_MIN_NTH;
7156 }
7157
7158 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7159 return retval;
7160 }
7161
7162 //
7163 // There is a slight delay in the load balance algorithm in detecting
7164 // new running procs. The real system load at this instant should be
7165 // at least as large as the #active omp thread that are available to
7166 // add to the team.
7167 //
7168 if ( system_active < team_curr_active ) {
7169 system_active = team_curr_active;
7170 }
7171 retval = __kmp_avail_proc - system_active + team_curr_active;
7172 if ( retval > set_nproc ) {
7173 retval = set_nproc;
7174 }
7175 if ( retval < KMP_MIN_NTH ) {
7176 retval = KMP_MIN_NTH;
7177 }
7178
7179 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7180 return retval;
7181} // __kmp_load_balance_nproc()
7182
7183#endif /* USE_LOAD_BALANCE */
7184
Jim Cownie5e8470a2013-09-27 10:38:44 +00007185/* ------------------------------------------------------------------------ */
7186/* ------------------------------------------------------------------------ */
7187
7188/* NOTE: this is called with the __kmp_init_lock held */
7189void
7190__kmp_cleanup( void )
7191{
7192 int f;
7193
7194 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7195
7196 if (TCR_4(__kmp_init_parallel)) {
7197#if KMP_HANDLE_SIGNALS
7198 __kmp_remove_signals();
7199#endif
7200 TCW_4(__kmp_init_parallel, FALSE);
7201 }
7202
7203 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007204#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007205 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007206#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007207 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007208 TCW_4(__kmp_init_middle, FALSE);
7209 }
7210
7211 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7212
7213 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007214 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007215 __kmp_init_serial = FALSE;
7216 }
7217
7218 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7219 if ( __kmp_root[ f ] != NULL ) {
7220 __kmp_free( __kmp_root[ f ] );
7221 __kmp_root[ f ] = NULL;
7222 }
7223 }
7224 __kmp_free( __kmp_threads );
7225 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7226 // freeing __kmp_root.
7227 __kmp_threads = NULL;
7228 __kmp_root = NULL;
7229 __kmp_threads_capacity = 0;
7230
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007231#if KMP_USE_DYNAMIC_LOCK
7232 __kmp_cleanup_indirect_user_locks();
7233#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007234 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007235#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007236
Alp Toker98758b02014-03-02 04:12:06 +00007237 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007238 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7239 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007240 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007241
7242 #if KMP_USE_ADAPTIVE_LOCKS
7243 #if KMP_DEBUG_ADAPTIVE_LOCKS
7244 __kmp_print_speculative_stats();
7245 #endif
7246 #endif
7247 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7248 __kmp_nested_nth.nth = NULL;
7249 __kmp_nested_nth.size = 0;
7250 __kmp_nested_nth.used = 0;
7251
7252 __kmp_i18n_catclose();
7253
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007254#if KMP_STATS_ENABLED
7255 __kmp_accumulate_stats_at_exit();
7256 __kmp_stats_list.deallocate();
7257#endif
7258
Jim Cownie5e8470a2013-09-27 10:38:44 +00007259 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7260}
7261
7262/* ------------------------------------------------------------------------ */
7263/* ------------------------------------------------------------------------ */
7264
7265int
7266__kmp_ignore_mppbeg( void )
7267{
7268 char *env;
7269
7270 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7271 if (__kmp_str_match_false( env ))
7272 return FALSE;
7273 }
7274 // By default __kmpc_begin() is no-op.
7275 return TRUE;
7276}
7277
7278int
7279__kmp_ignore_mppend( void )
7280{
7281 char *env;
7282
7283 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7284 if (__kmp_str_match_false( env ))
7285 return FALSE;
7286 }
7287 // By default __kmpc_end() is no-op.
7288 return TRUE;
7289}
7290
7291void
7292__kmp_internal_begin( void )
7293{
7294 int gtid;
7295 kmp_root_t *root;
7296
7297 /* this is a very important step as it will register new sibling threads
7298 * and assign these new uber threads a new gtid */
7299 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007300 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007301 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7302
7303 if( root->r.r_begin ) return;
7304 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7305 if( root->r.r_begin ) {
7306 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7307 return;
7308 }
7309
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007310 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311
7312 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7313}
7314
7315
7316/* ------------------------------------------------------------------------ */
7317/* ------------------------------------------------------------------------ */
7318
7319void
7320__kmp_user_set_library (enum library_type arg)
7321{
7322 int gtid;
7323 kmp_root_t *root;
7324 kmp_info_t *thread;
7325
7326 /* first, make sure we are initialized so we can get our gtid */
7327
7328 gtid = __kmp_entry_gtid();
7329 thread = __kmp_threads[ gtid ];
7330
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007331 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007332
7333 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7334 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7335 KMP_WARNING( SetLibraryIncorrectCall );
7336 return;
7337 }
7338
7339 switch ( arg ) {
7340 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007341 thread->th.th_set_nproc = 0;
7342 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007343 break;
7344 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007345 thread->th.th_set_nproc = 0;
7346 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007347 break;
7348 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007349 thread->th.th_set_nproc = 0;
7350 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007351 break;
7352 default:
7353 KMP_FATAL( UnknownLibraryType, arg );
7354 }
7355
7356 __kmp_aux_set_library ( arg );
7357}
7358
7359void
7360__kmp_aux_set_stacksize( size_t arg )
7361{
7362 if (! __kmp_init_serial)
7363 __kmp_serial_initialize();
7364
7365#if KMP_OS_DARWIN
7366 if (arg & (0x1000 - 1)) {
7367 arg &= ~(0x1000 - 1);
7368 if(arg + 0x1000) /* check for overflow if we round up */
7369 arg += 0x1000;
7370 }
7371#endif
7372 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7373
7374 /* only change the default stacksize before the first parallel region */
7375 if (! TCR_4(__kmp_init_parallel)) {
7376 size_t value = arg; /* argument is in bytes */
7377
7378 if (value < __kmp_sys_min_stksize )
7379 value = __kmp_sys_min_stksize ;
7380 else if (value > KMP_MAX_STKSIZE)
7381 value = KMP_MAX_STKSIZE;
7382
7383 __kmp_stksize = value;
7384
7385 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7386 }
7387
7388 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7389}
7390
7391/* set the behaviour of the runtime library */
7392/* TODO this can cause some odd behaviour with sibling parallelism... */
7393void
7394__kmp_aux_set_library (enum library_type arg)
7395{
7396 __kmp_library = arg;
7397
7398 switch ( __kmp_library ) {
7399 case library_serial :
7400 {
7401 KMP_INFORM( LibraryIsSerial );
7402 (void) __kmp_change_library( TRUE );
7403 }
7404 break;
7405 case library_turnaround :
7406 (void) __kmp_change_library( TRUE );
7407 break;
7408 case library_throughput :
7409 (void) __kmp_change_library( FALSE );
7410 break;
7411 default:
7412 KMP_FATAL( UnknownLibraryType, arg );
7413 }
7414}
7415
7416/* ------------------------------------------------------------------------ */
7417/* ------------------------------------------------------------------------ */
7418
7419void
7420__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7421{
7422 int blocktime = arg; /* argument is in milliseconds */
7423 int bt_intervals;
7424 int bt_set;
7425
7426 __kmp_save_internal_controls( thread );
7427
7428 /* Normalize and set blocktime for the teams */
7429 if (blocktime < KMP_MIN_BLOCKTIME)
7430 blocktime = KMP_MIN_BLOCKTIME;
7431 else if (blocktime > KMP_MAX_BLOCKTIME)
7432 blocktime = KMP_MAX_BLOCKTIME;
7433
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007434 set__blocktime_team( thread->th.th_team, tid, blocktime );
7435 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007436
7437 /* Calculate and set blocktime intervals for the teams */
7438 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7439
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007440 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7441 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007442
7443 /* Set whether blocktime has been set to "TRUE" */
7444 bt_set = TRUE;
7445
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007446 set__bt_set_team( thread->th.th_team, tid, bt_set );
7447 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007448 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7449 __kmp_gtid_from_tid(tid, thread->th.th_team),
7450 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7451}
7452
7453void
7454__kmp_aux_set_defaults(
7455 char const * str,
7456 int len
7457) {
7458 if ( ! __kmp_init_serial ) {
7459 __kmp_serial_initialize();
7460 };
7461 __kmp_env_initialize( str );
7462
7463 if (__kmp_settings
7464#if OMP_40_ENABLED
7465 || __kmp_display_env || __kmp_display_env_verbose
7466#endif // OMP_40_ENABLED
7467 ) {
7468 __kmp_env_print();
7469 }
7470} // __kmp_aux_set_defaults
7471
7472/* ------------------------------------------------------------------------ */
7473
7474/*
7475 * internal fast reduction routines
7476 */
7477
Jim Cownie5e8470a2013-09-27 10:38:44 +00007478PACKED_REDUCTION_METHOD_T
7479__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7480 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7481 kmp_critical_name *lck )
7482{
7483
7484 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7485 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7486 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7487 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7488
7489 PACKED_REDUCTION_METHOD_T retval;
7490
7491 int team_size;
7492
7493 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7494 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7495
7496 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7497 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7498
7499 retval = critical_reduce_block;
7500
7501 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7502
7503 if( team_size == 1 ) {
7504
7505 retval = empty_reduce_block;
7506
7507 } else {
7508
7509 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7510 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7511
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007512 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007513
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007514 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007515
7516 int teamsize_cutoff = 4;
7517
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007518#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7519 if( __kmp_mic_type != non_mic ) {
7520 teamsize_cutoff = 8;
7521 }
7522#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007524 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007525 if ( atomic_available ) {
7526 retval = atomic_reduce_block;
7527 }
7528 } else {
7529 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7530 }
7531 } else if ( atomic_available ) {
7532 retval = atomic_reduce_block;
7533 }
7534 #else
7535 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007536 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007537
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007538 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
Jim Cownie5e8470a2013-09-27 10:38:44 +00007539
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007540 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007541
Jim Cownie5e8470a2013-09-27 10:38:44 +00007542 // basic tuning
7543
7544 if( atomic_available ) {
7545 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7546 retval = atomic_reduce_block;
7547 }
7548 } // otherwise: use critical section
7549
7550 #elif KMP_OS_DARWIN
7551
Jim Cownie5e8470a2013-09-27 10:38:44 +00007552 if( atomic_available && ( num_vars <= 3 ) ) {
7553 retval = atomic_reduce_block;
7554 } else if( tree_available ) {
7555 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7556 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7557 }
7558 } // otherwise: use critical section
7559
7560 #else
7561 #error "Unknown or unsupported OS"
7562 #endif
7563
7564 #else
7565 #error "Unknown or unsupported architecture"
7566 #endif
7567
7568 }
7569
Jim Cownie5e8470a2013-09-27 10:38:44 +00007570 // KMP_FORCE_REDUCTION
7571
Andrey Churbanovec23a952015-08-17 10:12:12 +00007572 // If the team is serialized (team_size == 1), ignore the forced reduction
7573 // method and stay with the unsynchronized method (empty_reduce_block)
7574 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007575
7576 PACKED_REDUCTION_METHOD_T forced_retval;
7577
7578 int atomic_available, tree_available;
7579
7580 switch( ( forced_retval = __kmp_force_reduction_method ) )
7581 {
7582 case critical_reduce_block:
7583 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007584 break;
7585
7586 case atomic_reduce_block:
7587 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7588 KMP_ASSERT( atomic_available ); // atomic_available should be != 0
7589 break;
7590
7591 case tree_reduce_block:
7592 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7593 KMP_ASSERT( tree_available ); // tree_available should be != 0
7594 #if KMP_FAST_REDUCTION_BARRIER
7595 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7596 #endif
7597 break;
7598
7599 default:
7600 KMP_ASSERT( 0 ); // "unsupported method specified"
7601 }
7602
7603 retval = forced_retval;
7604 }
7605
7606 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7607
7608 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7609 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7610
7611 return ( retval );
7612}
7613
7614// this function is for testing set/get/determine reduce method
7615kmp_int32
7616__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007617 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007618}
7619
7620/* ------------------------------------------------------------------------ */