blob: 180c5339c28e822d1260e0b2cb73477aedd2f5a2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000028
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000029#if OMPT_SUPPORT
30#include "ompt-specific.h"
31#endif
32
Jim Cownie5e8470a2013-09-27 10:38:44 +000033/* these are temporary issues to be dealt with */
34#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#if KMP_OS_WINDOWS
37#include <process.h>
38#endif
39
40
41#if defined(KMP_GOMP_COMPAT)
42char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
43#endif /* defined(KMP_GOMP_COMPAT) */
44
45char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
46#if OMP_40_ENABLED
47 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000048#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000049 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000050#endif
51
52#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000053char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif /* KMP_DEBUG */
55
Jim Cownie181b4bb2013-12-23 17:28:57 +000056#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
57
Jim Cownie5e8470a2013-09-27 10:38:44 +000058/* ------------------------------------------------------------------------ */
59/* ------------------------------------------------------------------------ */
60
61kmp_info_t __kmp_monitor;
62
63/* ------------------------------------------------------------------------ */
64/* ------------------------------------------------------------------------ */
65
66/* Forward declarations */
67
68void __kmp_cleanup( void );
69
70static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000071static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000072#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000073static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000074#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000075static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000076void __kmp_fork_barrier( int gtid, int tid );
77void __kmp_join_barrier( int gtid );
78void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000079
Jim Cownie5e8470a2013-09-27 10:38:44 +000080#ifdef USE_LOAD_BALANCE
81static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
82#endif
83
84static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000086static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000088static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
89static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
90static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
91
92/* ------------------------------------------------------------------------ */
93/* ------------------------------------------------------------------------ */
94
95/* Calculate the identifier of the current thread */
96/* fast (and somewhat portable) way to get unique */
97/* identifier of executing thread. */
98/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
99
100int
101__kmp_get_global_thread_id( )
102{
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
109
110 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
112
113 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
114 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
115 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
116 __kmp_init_gtid for this to work. */
117
118 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
119
120#ifdef KMP_TDATA_GTID
121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
123 return __kmp_gtid;
124 }
125#endif
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
129 }
130 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
131
132 stack_addr = (char*) & stack_data;
133 other_threads = __kmp_threads;
134
135 /*
136 ATT: The code below is a source of potential bugs due to unsynchronized access to
137 __kmp_threads array. For example:
138 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
139 2. Current thread is suspended by OS.
140 3. Another thread unregisters and finishes (debug versions of free() may fill memory
141 with something like 0xEF).
142 4. Current thread is resumed.
143 5. Current thread reads junk from *thr.
144 TODO: Fix it.
145 --ln
146 */
147
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
149
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
151 if( !thr ) continue;
152
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155
156 /* stack grows down -- search through all of the active threads */
157
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
160
161 if( stack_diff <= stack_size ) {
162 /* The only way we can be closer than the allocated */
163 /* stack size is if we are running on this thread. */
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
165 return i;
166 }
167 }
168 }
169
170 /* get specific to try and determine our gtid */
171 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
174
175 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
176
177 /* if we havn't been assigned a gtid, then return code */
178 if( i<0 ) return i;
179
180 /* dynamically updated stack window for uber threads to avoid get_specific call */
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
183 }
184
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
190 } else {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
192 }
193
194 /* Reprint stack bounds for ubermaster since they have been refined */
195 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000196 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000199 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000200 "th_%d stack (refinement)", i );
201 }
202 return i;
203}
204
205int
206__kmp_get_global_thread_id_reg( )
207{
208 int gtid;
209
210 if ( !__kmp_init_serial ) {
211 gtid = KMP_GTID_DNE;
212 } else
213#ifdef KMP_TDATA_GTID
214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
216 gtid = __kmp_gtid;
217 } else
218#endif
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
222 } else {
223 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
225 }
226
227 /* we must be a new uber master sibling thread */
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
235 } else {
236 gtid = __kmp_register_root(FALSE);
237 }
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
239 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
240 }
241
242 KMP_DEBUG_ASSERT( gtid >=0 );
243
244 return gtid;
245}
246
247/* caller must hold forkjoin_lock */
248void
249__kmp_check_stack_overlap( kmp_info_t *th )
250{
251 int f;
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
254 int gtid;
255
256 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000258 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
261 gtid = __kmp_gtid_from_thread( th );
262
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)", "mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
267 } else {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 }
272 }
273
274 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 {
278 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000280 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 }
283
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
286
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
293
294 /* Print the other stack values before the abort */
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
300
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
302 }
303 }
304 }
305 }
306 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
307}
308
309
310/* ------------------------------------------------------------------------ */
311
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312/* ------------------------------------------------------------------------ */
313
314void
315__kmp_infinite_loop( void )
316{
317 static int done = FALSE;
318
319 while (! done) {
320 KMP_YIELD( 1 );
321 }
322}
323
324#define MAX_MESSAGE 512
325
326void
327__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
328 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 va_list ap;
330
331 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000332 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000336 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 if(gtid >= 0) {
338 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
341 if(node < 0) /* doesn't work, so don't try this next time */
342 __kmp_storage_map_verbose = FALSE;
343 else {
344 char *last;
345 int lastNode;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
347
348 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
350 if(localProc >= 0)
351 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
352 else
353 __kmp_printf_no_lock(" GTID %d\n", gtid);
354# if KMP_USE_PRCTL
355/* The more elaborate format is disabled for now because of the prctl hanging bug. */
356 do {
357 last = p1;
358 lastNode = node;
359 /* This loop collates adjacent pages with the same host node. */
360 do {
361 (char*)p1 += PAGE_SIZE;
362 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
364 (char*)p1 - 1, lastNode);
365 } while(p1 <= p2);
366# else
367 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
368 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
369 if(p1 < p2) {
370 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
371 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
372 }
373# endif
374 }
375 }
376 } else
377 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
378 }
379#endif /* KMP_PRINT_DATA_PLACEMENT */
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
381}
382
383void
384__kmp_warn( char const * format, ... )
385{
386 char buffer[MAX_MESSAGE];
387 va_list ap;
388
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
390 return;
391 }
392
393 va_start( ap, format );
394
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000395 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
399
400 va_end( ap );
401}
402
403void
404__kmp_abort_process()
405{
406
407 // Later threads may stall here, but that's ok because abort() will kill them.
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
409
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
412 }; // if
413
414 if ( KMP_OS_WINDOWS ) {
415 // Let other threads know of abnormal termination and prevent deadlock
416 // if abort happened during library initialization or shutdown
417 __kmp_global.g.g_abort = SIGABRT;
418
419 /*
420 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
421 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
422 works well, but this function is not available in VS7 (this is not problem for DLL, but
423 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
424 not help, at least in some versions of MS C RTL.
425
426 It seems following sequence is the only way to simulate abort() and avoid pop-up error
427 box.
428 */
429 raise( SIGABRT );
430 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
431 } else {
432 abort();
433 }; // if
434
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
437
438} // __kmp_abort_process
439
440void
441__kmp_abort_thread( void )
442{
443 // TODO: Eliminate g_abort global variable and this function.
444 // In case of abort just call abort(), it will kill all the threads.
445 __kmp_infinite_loop();
446} // __kmp_abort_thread
447
448/* ------------------------------------------------------------------------ */
449
450/*
451 * Print out the storage map for the major kmp_info_t thread data structures
452 * that are allocated together.
453 */
454
455static void
456__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
457{
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
459
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
462
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
472
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
476
477 #if KMP_FAST_REDUCTION_BARRIER
478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER
482}
483
484/*
485 * Print out the storage map for the major kmp_team_t team data structures
486 * that are allocated together.
487 */
488
489static void
490__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
491{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id );
495
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
498
499
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
502
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
505
506 #if KMP_FAST_REDUCTION_BARRIER
507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
513
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
519 header, team_id );
520
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
524}
525
526static void __kmp_init_allocator() {}
527static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000528
529/* ------------------------------------------------------------------------ */
530
Jonathan Peyton99016992015-05-26 17:32:53 +0000531#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532# if KMP_OS_WINDOWS
533
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534static void
535__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
536 // TODO: Change to __kmp_break_bootstrap_lock().
537 __kmp_init_bootstrap_lock( lck ); // make the lock released
538}
539
540static void
541__kmp_reset_locks_on_process_detach( int gtid_req ) {
542 int i;
543 int thread_count;
544
545 // PROCESS_DETACH is expected to be called by a thread
546 // that executes ProcessExit() or FreeLibrary().
547 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
548 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
549 // However, in fact, some threads can be still alive here, although being about to be terminated.
550 // The threads in the array with ds_thread==0 are most suspicious.
551 // Actually, it can be not safe to access the __kmp_threads[].
552
553 // TODO: does it make sense to check __kmp_roots[] ?
554
555 // Let's check that there are no other alive threads registered with the OMP lib.
556 while( 1 ) {
557 thread_count = 0;
558 for( i = 0; i < __kmp_threads_capacity; ++i ) {
559 if( !__kmp_threads ) continue;
560 kmp_info_t* th = __kmp_threads[ i ];
561 if( th == NULL ) continue;
562 int gtid = th->th.th_info.ds.ds_gtid;
563 if( gtid == gtid_req ) continue;
564 if( gtid < 0 ) continue;
565 DWORD exit_val;
566 int alive = __kmp_is_thread_alive( th, &exit_val );
567 if( alive ) {
568 ++thread_count;
569 }
570 }
571 if( thread_count == 0 ) break; // success
572 }
573
574 // Assume that I'm alone.
575
576 // Now it might be probably safe to check and reset locks.
577 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
578 __kmp_reset_lock( &__kmp_forkjoin_lock );
579 #ifdef KMP_DEBUG
580 __kmp_reset_lock( &__kmp_stdio_lock );
581 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582}
583
584BOOL WINAPI
585DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
586 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
587
588 switch( fdwReason ) {
589
590 case DLL_PROCESS_ATTACH:
591 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
592
593 return TRUE;
594
595 case DLL_PROCESS_DETACH:
596 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
597 __kmp_gtid_get_specific() ));
598
599 if( lpReserved != NULL )
600 {
601 // lpReserved is used for telling the difference:
602 // lpReserved == NULL when FreeLibrary() was called,
603 // lpReserved != NULL when the process terminates.
604 // When FreeLibrary() is called, worker threads remain alive.
605 // So they will release the forkjoin lock by themselves.
606 // When the process terminates, worker threads disappear triggering
607 // the problem of unreleased forkjoin lock as described below.
608
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000609 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 // The problem comes up if that worker thread becomes dead
611 // before it releases the forkjoin lock.
612 // The forkjoin lock remains taken, while the thread
613 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
614 // will try to take the forkjoin lock and will always fail,
615 // so that the application will never finish [normally].
616 // This scenario is possible if __kmpc_end() has not been executed.
617 // It looks like it's not a corner case, but common cases:
618 // - the main function was compiled by an alternative compiler;
619 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
620 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
621 // - alive foreign thread prevented __kmpc_end from doing cleanup.
622
623 // This is a hack to work around the problem.
624 // TODO: !!! to figure out something better.
625 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
626 }
627
628 __kmp_internal_end_library( __kmp_gtid_get_specific() );
629
630 return TRUE;
631
632 case DLL_THREAD_ATTACH:
633 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
634
635 /* if we wanted to register new siblings all the time here call
636 * __kmp_get_gtid(); */
637 return TRUE;
638
639 case DLL_THREAD_DETACH:
640 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
641 __kmp_gtid_get_specific() ));
642
643 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
644 return TRUE;
645 }
646
647 return TRUE;
648}
649
650# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000651#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000652
653
654/* ------------------------------------------------------------------------ */
655
656/* Change the library type to "status" and return the old type */
657/* called from within initialization routines where __kmp_initz_lock is held */
658int
659__kmp_change_library( int status )
660{
661 int old_status;
662
663 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
664
665 if (status) {
666 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
667 }
668 else {
669 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
670 }
671
672 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
673}
674
675/* ------------------------------------------------------------------------ */
676/* ------------------------------------------------------------------------ */
677
678/* __kmp_parallel_deo --
679 * Wait until it's our turn.
680 */
681void
682__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
683{
684 int gtid = *gtid_ref;
685#ifdef BUILD_PARALLEL_ORDERED
686 kmp_team_t *team = __kmp_team_from_gtid( gtid );
687#endif /* BUILD_PARALLEL_ORDERED */
688
689 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000690 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
692 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
693#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 }
697#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000699 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KMP_MB();
702 }
703#endif /* BUILD_PARALLEL_ORDERED */
704}
705
706/* __kmp_parallel_dxo --
707 * Signal the next task.
708 */
709
710void
711__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
712{
713 int gtid = *gtid_ref;
714#ifdef BUILD_PARALLEL_ORDERED
715 int tid = __kmp_tid_from_gtid( gtid );
716 kmp_team_t *team = __kmp_team_from_gtid( gtid );
717#endif /* BUILD_PARALLEL_ORDERED */
718
719 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000720 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
722 }
723#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000724 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725 KMP_MB(); /* Flush all pending memory write invalidates. */
726
727 /* use the tid of the next thread in this team */
728 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000731#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000732 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
734 /* accept blame for "ordered" waiting */
735 kmp_info_t *this_thread = __kmp_threads[gtid];
736 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
737 this_thread->th.ompt_thread_info.wait_id);
738 }
739#endif
740
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741 KMP_MB(); /* Flush all pending memory write invalidates. */
742 }
743#endif /* BUILD_PARALLEL_ORDERED */
744}
745
746/* ------------------------------------------------------------------------ */
747/* ------------------------------------------------------------------------ */
748
749/* ------------------------------------------------------------------------ */
750/* ------------------------------------------------------------------------ */
751
752/* The BARRIER for a SINGLE process section is always explicit */
753
754int
755__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
756{
757 int status;
758 kmp_info_t *th;
759 kmp_team_t *team;
760
761 if( ! TCR_4(__kmp_init_parallel) )
762 __kmp_parallel_initialize();
763
764 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766 status = 0;
767
768 th->th.th_ident = id_ref;
769
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000770 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 status = 1;
772 } else {
773 kmp_int32 old_this = th->th.th_local.this_construct;
774
775 ++th->th.th_local.this_construct;
776 /* try to set team count to thread count--success means thread got the
777 single block
778 */
779 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000780 if (team->t.t_construct == old_this) {
781 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
782 th->th.th_local.this_construct);
783 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000784#if USE_ITT_BUILD
785 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
786#if OMP_40_ENABLED
787 th->th.th_teams_microtask == NULL &&
788#endif
789 team->t.t_active_level == 1 )
790 { // Only report metadata by master of active team at level 1
791 __kmp_itt_metadata_single( id_ref );
792 }
793#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794 }
795
796 if( __kmp_env_consistency_check ) {
797 if (status && push_ws) {
798 __kmp_push_workshare( gtid, ct_psingle, id_ref );
799 } else {
800 __kmp_check_workshare( gtid, ct_psingle, id_ref );
801 }
802 }
803#if USE_ITT_BUILD
804 if ( status ) {
805 __kmp_itt_single_start( gtid );
806 }
807#endif /* USE_ITT_BUILD */
808 return status;
809}
810
811void
812__kmp_exit_single( int gtid )
813{
814#if USE_ITT_BUILD
815 __kmp_itt_single_end( gtid );
816#endif /* USE_ITT_BUILD */
817 if( __kmp_env_consistency_check )
818 __kmp_pop_workshare( gtid, ct_psingle, NULL );
819}
820
821
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822/*
823 * determine if we can go parallel or must use a serialized parallel region and
824 * how many threads we can use
825 * set_nproc is the number of threads requested for the team
826 * returns 0 if we should serialize or only use one thread,
827 * otherwise the number of threads to use
828 * The forkjoin lock is held by the caller.
829 */
830static int
831__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
832 int master_tid, int set_nthreads
833#if OMP_40_ENABLED
834 , int enter_teams
835#endif /* OMP_40_ENABLED */
836)
837{
838 int capacity;
839 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840 KMP_DEBUG_ASSERT( __kmp_init_serial );
841 KMP_DEBUG_ASSERT( root && parent_team );
842
843 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 // If dyn-var is set, dynamically adjust the number of desired threads,
845 // according to the method specified by dynamic_mode.
846 //
847 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
849 ;
850 }
851#ifdef USE_LOAD_BALANCE
852 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
853 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
854 if ( new_nthreads == 1 ) {
855 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
856 master_tid ));
857 return 1;
858 }
859 if ( new_nthreads < set_nthreads ) {
860 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
861 master_tid, new_nthreads ));
862 }
863 }
864#endif /* USE_LOAD_BALANCE */
865 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
866 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
867 : root->r.r_hot_team->t.t_nproc);
868 if ( new_nthreads <= 1 ) {
869 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
870 master_tid ));
871 return 1;
872 }
873 if ( new_nthreads < set_nthreads ) {
874 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
875 master_tid, new_nthreads ));
876 }
877 else {
878 new_nthreads = set_nthreads;
879 }
880 }
881 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
882 if ( set_nthreads > 2 ) {
883 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
884 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
885 if ( new_nthreads == 1 ) {
886 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
887 master_tid ));
888 return 1;
889 }
890 if ( new_nthreads < set_nthreads ) {
891 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
892 master_tid, new_nthreads ));
893 }
894 }
895 }
896 else {
897 KMP_ASSERT( 0 );
898 }
899
900 //
901 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
902 //
903 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
904 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
905 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
906 root->r.r_hot_team->t.t_nproc );
907 if ( tl_nthreads <= 0 ) {
908 tl_nthreads = 1;
909 }
910
911 //
912 // If dyn-var is false, emit a 1-time warning.
913 //
914 if ( ! get__dynamic_2( parent_team, master_tid )
915 && ( ! __kmp_reserve_warn ) ) {
916 __kmp_reserve_warn = 1;
917 __kmp_msg(
918 kmp_ms_warning,
919 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
920 KMP_HNT( Unset_ALL_THREADS ),
921 __kmp_msg_null
922 );
923 }
924 if ( tl_nthreads == 1 ) {
925 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
926 master_tid ));
927 return 1;
928 }
929 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
930 master_tid, tl_nthreads ));
931 new_nthreads = tl_nthreads;
932 }
933
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934 //
935 // Check if the threads array is large enough, or needs expanding.
936 //
937 // See comment in __kmp_register_root() about the adjustment if
938 // __kmp_threads[0] == NULL.
939 //
940 capacity = __kmp_threads_capacity;
941 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
942 --capacity;
943 }
944 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
945 root->r.r_hot_team->t.t_nproc ) > capacity ) {
946 //
947 // Expand the threads array.
948 //
949 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
950 root->r.r_hot_team->t.t_nproc ) - capacity;
951 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
952 if ( slotsAdded < slotsRequired ) {
953 //
954 // The threads array was not expanded enough.
955 //
956 new_nthreads -= ( slotsRequired - slotsAdded );
957 KMP_ASSERT( new_nthreads >= 1 );
958
959 //
960 // If dyn-var is false, emit a 1-time warning.
961 //
962 if ( ! get__dynamic_2( parent_team, master_tid )
963 && ( ! __kmp_reserve_warn ) ) {
964 __kmp_reserve_warn = 1;
965 if ( __kmp_tp_cached ) {
966 __kmp_msg(
967 kmp_ms_warning,
968 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
969 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
970 KMP_HNT( PossibleSystemLimitOnThreads ),
971 __kmp_msg_null
972 );
973 }
974 else {
975 __kmp_msg(
976 kmp_ms_warning,
977 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
978 KMP_HNT( SystemLimitOnThreads ),
979 __kmp_msg_null
980 );
981 }
982 }
983 }
984 }
985
986 if ( new_nthreads == 1 ) {
987 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
988 __kmp_get_gtid(), set_nthreads ) );
989 return 1;
990 }
991
992 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
993 __kmp_get_gtid(), new_nthreads, set_nthreads ));
994 return new_nthreads;
995}
996
997/* ------------------------------------------------------------------------ */
998/* ------------------------------------------------------------------------ */
999
1000/* allocate threads from the thread pool and assign them to the new team */
1001/* we are assured that there are enough threads available, because we
1002 * checked on that earlier within critical section forkjoin */
1003
1004static void
1005__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1006 kmp_info_t *master_th, int master_gtid )
1007{
1008 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001009 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010
1011 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1012 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1013 KMP_MB();
1014
1015 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001016 master_th->th.th_info.ds.ds_tid = 0;
1017 master_th->th.th_team = team;
1018 master_th->th.th_team_nproc = team->t.t_nproc;
1019 master_th->th.th_team_master = master_th;
1020 master_th->th.th_team_serialized = FALSE;
1021 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001022
1023 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001024#if KMP_NESTED_HOT_TEAMS
1025 use_hot_team = 0;
1026 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1027 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1028 int level = team->t.t_active_level - 1; // index in array of hot teams
1029 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1030 if( master_th->th.th_teams_size.nteams > 1 ) {
1031 ++level; // level was not increased in teams construct for team_of_masters
1032 }
1033 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1034 master_th->th.th_teams_level == team->t.t_level ) {
1035 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1036 } // team->t.t_level will be increased inside parallel
1037 }
1038 if( level < __kmp_hot_teams_max_level ) {
1039 if( hot_teams[level].hot_team ) {
1040 // hot team has already been allocated for given level
1041 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1042 use_hot_team = 1; // the team is ready to use
1043 } else {
1044 use_hot_team = 0; // AC: threads are not allocated yet
1045 hot_teams[level].hot_team = team; // remember new hot team
1046 hot_teams[level].hot_team_nth = team->t.t_nproc;
1047 }
1048 } else {
1049 use_hot_team = 0;
1050 }
1051 }
1052#else
1053 use_hot_team = team == root->r.r_hot_team;
1054#endif
1055 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001056
1057 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001058 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059 __kmp_initialize_info( master_th, team, 0, master_gtid );
1060
1061 /* now, install the worker threads */
1062 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1063
1064 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001065 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1066 team->t.t_threads[ i ] = thr;
1067 KMP_DEBUG_ASSERT( thr );
1068 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001070 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1072 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1073 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1074 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075#if OMP_40_ENABLED
1076 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1077 thr->th.th_teams_level = master_th->th.th_teams_level;
1078 thr->th.th_teams_size = master_th->th.th_teams_size;
1079#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080 { // Initialize threads' barrier data.
1081 int b;
1082 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1083 for ( b = 0; b < bs_last_barrier; ++ b ) {
1084 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001085 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001086#if USE_DEBUGGER
1087 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1088#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001089 }; // for b
1090 }
1091 }
1092
Alp Toker98758b02014-03-02 04:12:06 +00001093#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094 __kmp_partition_places( team );
1095#endif
1096
1097 }
1098
1099 KMP_MB();
1100}
1101
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001102#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1103//
1104// Propagate any changes to the floating point control registers out to the team
1105// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1106// so we don't make changes unless they are needed.
1107//
1108inline static void
1109propagateFPControl(kmp_team_t * team)
1110{
1111 if ( __kmp_inherit_fp_control ) {
1112 kmp_int16 x87_fpu_control_word;
1113 kmp_uint32 mxcsr;
1114
1115 // Get master values of FPU control flags (both X87 and vector)
1116 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1117 __kmp_store_mxcsr( &mxcsr );
1118 mxcsr &= KMP_X86_MXCSR_MASK;
1119
1120 // There is no point looking at t_fp_control_saved here.
1121 // If it is TRUE, we still have to update the values if they are different from those we now have.
1122 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1123 // that the values in the team are the same as those we have.
1124 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1125 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1126 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001127 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1128 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001129 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1130 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001131 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001132 }
1133 else {
1134 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001135 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001136 }
1137}
1138
1139// Do the opposite, setting the hardware registers to the updated values from the team.
1140inline static void
1141updateHWFPControl(kmp_team_t * team)
1142{
1143 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1144 //
1145 // Only reset the fp control regs if they have been changed in the team.
1146 // the parallel region that we are exiting.
1147 //
1148 kmp_int16 x87_fpu_control_word;
1149 kmp_uint32 mxcsr;
1150 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1151 __kmp_store_mxcsr( &mxcsr );
1152 mxcsr &= KMP_X86_MXCSR_MASK;
1153
1154 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1155 __kmp_clear_x87_fpu_status_word();
1156 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1157 }
1158
1159 if ( team->t.t_mxcsr != mxcsr ) {
1160 __kmp_load_mxcsr( &team->t.t_mxcsr );
1161 }
1162 }
1163}
1164#else
1165# define propagateFPControl(x) ((void)0)
1166# define updateHWFPControl(x) ((void)0)
1167#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1168
Jim Cownie5e8470a2013-09-27 10:38:44 +00001169static void
1170__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1171
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001172/*
1173 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1174 */
1175void
1176__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1177{
1178 kmp_info_t *this_thr;
1179 kmp_team_t *serial_team;
1180
1181 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1182
1183 /* Skip all this code for autopar serialized loops since it results in
1184 unacceptable overhead */
1185 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1186 return;
1187
1188 if( ! TCR_4( __kmp_init_parallel ) )
1189 __kmp_parallel_initialize();
1190
1191 this_thr = __kmp_threads[ global_tid ];
1192 serial_team = this_thr->th.th_serial_team;
1193
1194 /* utilize the serialized team held by this thread */
1195 KMP_DEBUG_ASSERT( serial_team );
1196 KMP_MB();
1197
1198 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001199 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1200 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1202 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1203 this_thr->th.th_task_team = NULL;
1204 }
1205
1206#if OMP_40_ENABLED
1207 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1208 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1209 proc_bind = proc_bind_false;
1210 }
1211 else if ( proc_bind == proc_bind_default ) {
1212 //
1213 // No proc_bind clause was specified, so use the current value
1214 // of proc-bind-var for this parallel region.
1215 //
1216 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1217 }
1218 //
1219 // Reset for next parallel region
1220 //
1221 this_thr->th.th_set_proc_bind = proc_bind_default;
1222#endif /* OMP_40_ENABLED */
1223
1224 if( this_thr->th.th_team != serial_team ) {
1225 // Nested level will be an index in the nested nthreads array
1226 int level = this_thr->th.th_team->t.t_level;
1227
1228 if( serial_team->t.t_serialized ) {
1229 /* this serial team was already used
1230 * TODO increase performance by making this locks more specific */
1231 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001232
1233 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1234
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001235#if OMPT_SUPPORT
1236 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1237#endif
1238
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001240#if OMPT_SUPPORT
1241 ompt_parallel_id,
1242#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001243#if OMP_40_ENABLED
1244 proc_bind,
1245#endif
1246 & this_thr->th.th_current_task->td_icvs,
1247 0 USE_NESTED_HOT_ARG(NULL) );
1248 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1249 KMP_ASSERT( new_team );
1250
1251 /* setup new serialized team and install it */
1252 new_team->t.t_threads[0] = this_thr;
1253 new_team->t.t_parent = this_thr->th.th_team;
1254 serial_team = new_team;
1255 this_thr->th.th_serial_team = serial_team;
1256
1257 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1258 global_tid, serial_team ) );
1259
1260
1261 /* TODO the above breaks the requirement that if we run out of
1262 * resources, then we can still guarantee that serialized teams
1263 * are ok, since we may need to allocate a new one */
1264 } else {
1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team ) );
1267 }
1268
1269 /* we have to initialize this serial team */
1270 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1271 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1272 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
1277 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1280
1281 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1282 global_tid, this_thr->th.th_current_task ) );
1283 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1284 this_thr->th.th_current_task->td_flags.executing = 0;
1285
1286 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1287
1288 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1289 each serialized task represented by team->t.t_serialized? */
1290 copy_icvs(
1291 & this_thr->th.th_current_task->td_icvs,
1292 & this_thr->th.th_current_task->td_parent->td_icvs );
1293
1294 // Thread value exists in the nested nthreads array for the next nested level
1295 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1296 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1297 }
1298
1299#if OMP_40_ENABLED
1300 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1301 this_thr->th.th_current_task->td_icvs.proc_bind
1302 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1303 }
1304#endif /* OMP_40_ENABLED */
1305
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001306#if USE_DEBUGGER
1307 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1308#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001309 this_thr->th.th_info.ds.ds_tid = 0;
1310
1311 /* set thread cache values */
1312 this_thr->th.th_team_nproc = 1;
1313 this_thr->th.th_team_master = this_thr;
1314 this_thr->th.th_team_serialized = 1;
1315
1316 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1317 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1318
1319 propagateFPControl (serial_team);
1320
1321 /* check if we need to allocate dispatch buffers stack */
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1324 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1325 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1326 }
1327 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1328
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329#if OMPT_SUPPORT
1330 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1331 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1332#endif
1333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 KMP_MB();
1335
1336 } else {
1337 /* this serialized team is already being used,
1338 * that's fine, just add another nested level */
1339 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1340 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1341 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1342 ++ serial_team->t.t_serialized;
1343 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1344
1345 // Nested level will be an index in the nested nthreads array
1346 int level = this_thr->th.th_team->t.t_level;
1347 // Thread value exists in the nested nthreads array for the next nested level
1348 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1349 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1350 }
1351 serial_team->t.t_level++;
1352 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1353 global_tid, serial_team, serial_team->t.t_level ) );
1354
1355 /* allocate/push dispatch buffers stack */
1356 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1357 {
1358 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1359 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1360 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1361 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1362 }
1363 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1364
1365 KMP_MB();
1366 }
1367
1368 if ( __kmp_env_consistency_check )
1369 __kmp_push_parallel( global_tid, NULL );
1370
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001371}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001372
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373/* most of the work for a fork */
1374/* return true if we really went parallel, false if serialized */
1375int
1376__kmp_fork_call(
1377 ident_t * loc,
1378 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001379 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001381#if OMPT_SUPPORT
1382 void *unwrapped_task,
1383#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001384 microtask_t microtask,
1385 launch_t invoker,
1386/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001387#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 va_list * ap
1389#else
1390 va_list ap
1391#endif
1392 )
1393{
1394 void **argv;
1395 int i;
1396 int master_tid;
1397 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001398 kmp_team_t *team;
1399 kmp_team_t *parent_team;
1400 kmp_info_t *master_th;
1401 kmp_root_t *root;
1402 int nthreads;
1403 int master_active;
1404 int master_set_numthreads;
1405 int level;
1406#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001407 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408 int teams_level;
1409#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001410#if KMP_NESTED_HOT_TEAMS
1411 kmp_hot_team_ptr_t **p_hot_teams;
1412#endif
1413 { // KMP_TIME_BLOCK
Jonathan Peyton45be4502015-08-11 21:36:41 +00001414 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1415 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416
1417 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1419 /* Some systems prefer the stack for the root thread(s) to start with */
1420 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001421 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422 /* These 2 lines below are so this does not get optimized out */
1423 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1424 __kmp_stkpadding += (short)((kmp_int64)dummy);
1425 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001426
1427 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429 if( ! TCR_4(__kmp_init_parallel) )
1430 __kmp_parallel_initialize();
1431
1432 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001433 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1434 parent_team = master_th->th.th_team;
1435 master_tid = master_th->th.th_info.ds.ds_tid;
1436 master_this_cons = master_th->th.th_local.this_construct;
1437 root = master_th->th.th_root;
1438 master_active = root->r.r_active;
1439 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001440
1441#if OMPT_SUPPORT
1442 ompt_parallel_id_t ompt_parallel_id;
1443 ompt_task_id_t ompt_task_id;
1444 ompt_frame_t *ompt_frame;
1445 ompt_task_id_t my_task_id;
1446 ompt_parallel_id_t my_parallel_id;
1447
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001448 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001449 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1450 ompt_task_id = __ompt_get_task_id_internal(0);
1451 ompt_frame = __ompt_get_task_frame_internal(0);
1452 }
1453#endif
1454
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 // Nested level will be an index in the nested nthreads array
1456 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001458#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1460#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001461#if KMP_NESTED_HOT_TEAMS
1462 p_hot_teams = &master_th->th.th_hot_teams;
1463 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1464 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1465 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1466 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1467 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1468 }
1469#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001471#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001472 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001473 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1474 int team_size = master_set_numthreads;
1475
1476 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1477 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001478 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 }
1480#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481
Jim Cownie5e8470a2013-09-27 10:38:44 +00001482 master_th->th.th_ident = loc;
1483
1484#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001485 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1487 // AC: This is start of parallel that is nested inside teams construct.
1488 // The team is actual (hot), all workers are ready at the fork barrier.
1489 // No lock needed to initialize the team a bit, then free workers.
1490 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001491 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 parent_team->t.t_argc = argc;
1493 argv = (void**)parent_team->t.t_argv;
1494 for( i=argc-1; i >= 0; --i )
1495/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001496#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497 *argv++ = va_arg( *ap, void * );
1498#else
1499 *argv++ = va_arg( ap, void * );
1500#endif
1501 /* Increment our nested depth levels, but not increase the serialization */
1502 if ( parent_team == master_th->th.th_serial_team ) {
1503 // AC: we are in serialized parallel
1504 __kmpc_serialized_parallel(loc, gtid);
1505 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1506 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1507 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508
1509#if OMPT_SUPPORT
1510 void *dummy;
1511 void **exit_runtime_p;
1512
1513 ompt_lw_taskteam_t lw_taskteam;
1514
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001515 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001516 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1517 unwrapped_task, ompt_parallel_id);
1518 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1519 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1520
1521 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1522
1523#if OMPT_TRACE
1524 /* OMPT implicit task begin */
1525 my_task_id = lw_taskteam.ompt_task_info.task_id;
1526 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1529 my_parallel_id, my_task_id);
1530 }
1531#endif
1532
1533 /* OMPT state */
1534 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1535 } else {
1536 exit_runtime_p = &dummy;
1537 }
1538#endif
1539
Jonathan Peyton45be4502015-08-11 21:36:41 +00001540 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001541 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1542 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001543 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001544#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001545 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001546#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001547 );
1548 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549
1550#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001551 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552#if OMPT_TRACE
1553 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1554
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001555 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001556 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1557 ompt_parallel_id, ompt_task_id);
1558 }
1559
1560 __ompt_lw_taskteam_unlink(master_th);
1561 // reset clear the task id only after unlinking the task
1562 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1563#endif
1564
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001565 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001566 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001567 ompt_parallel_id, ompt_task_id,
1568 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001569 }
1570 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1571 }
1572#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001573 return TRUE;
1574 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001575
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001577#if OMPT_SUPPORT
1578 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1579#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 parent_team->t.t_invoke = invoker;
1581 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1582 parent_team->t.t_active_level ++;
1583 parent_team->t.t_level ++;
1584
1585 /* Change number of threads in the team if requested */
1586 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001587 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001588 // AC: only can reduce the number of threads dynamically, cannot increase
1589 kmp_info_t **other_threads = parent_team->t.t_threads;
1590 parent_team->t.t_nproc = master_set_numthreads;
1591 for ( i = 0; i < master_set_numthreads; ++i ) {
1592 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1593 }
1594 // Keep extra threads hot in the team for possible next parallels
1595 }
1596 master_th->th.th_set_nproc = 0;
1597 }
1598
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001599#if USE_DEBUGGER
1600 if ( __kmp_debugging ) { // Let debugger override number of threads.
1601 int nth = __kmp_omp_num_threads( loc );
1602 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1603 master_set_numthreads = nth;
1604 }; // if
1605 }; // if
1606#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001607
1608 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1609 __kmp_internal_fork( loc, gtid, parent_team );
1610 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1611
1612 /* Invoke microtask for MASTER thread */
1613 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1614 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1615
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001616 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001617 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1618 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001619 if (! parent_team->t.t_invoke( gtid )) {
1620 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001622 }
1623 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1624 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1625 KMP_MB(); /* Flush all pending memory write invalidates. */
1626
1627 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1628
1629 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001630 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631#endif /* OMP_40_ENABLED */
1632
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001633#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001635 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001637#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1640 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001641 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001642#if OMP_40_ENABLED
1643 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001647
1648 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1649 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1650 if (nthreads > 1) {
1651 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1652#if OMP_40_ENABLED
1653 && !enter_teams
1654#endif /* OMP_40_ENABLED */
1655 ) ) || ( __kmp_library == library_serial ) ) {
1656 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1657 gtid, nthreads ));
1658 nthreads = 1;
1659 }
1660 }
1661 if ( nthreads > 1 ) {
1662 /* determine how many new threads we can use */
1663 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1664
1665 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001667/* AC: If we execute teams from parallel region (on host), then teams should be created
1668 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1669 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001670 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001673 if ( nthreads == 1 ) {
1674 // Free lock for single thread execution here;
1675 // for multi-thread execution it will be freed later
1676 // after team of threads created and initialized
1677 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1678 }
1679 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680 }
1681 KMP_DEBUG_ASSERT( nthreads > 0 );
1682
1683 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001684 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685
Jim Cownie5e8470a2013-09-27 10:38:44 +00001686 /* create a serialized parallel region? */
1687 if ( nthreads == 1 ) {
1688 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001689#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690 void * args[ argc ];
1691#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001692 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001693#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1696
1697 __kmpc_serialized_parallel(loc, gtid);
1698
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001699 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001701 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702#if OMP_40_ENABLED
1703 if ( !ap ) {
1704 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001705 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001707
1708#if OMPT_SUPPORT
1709 void *dummy;
1710 void **exit_runtime_p;
1711
1712 ompt_lw_taskteam_t lw_taskteam;
1713
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001714 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001715 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1716 unwrapped_task, ompt_parallel_id);
1717 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1718 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1719
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1721
1722#if OMPT_TRACE
1723 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001724 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001725 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1726 ompt_parallel_id, my_task_id);
1727 }
1728#endif
1729
1730 /* OMPT state */
1731 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1732 } else {
1733 exit_runtime_p = &dummy;
1734 }
1735#endif
1736
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001737 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001738 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1739 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1741#if OMPT_SUPPORT
1742 , exit_runtime_p
1743#endif
1744 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001745 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001746
1747#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001748 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001749 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1750
1751#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001752 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1754 ompt_parallel_id, ompt_task_id);
1755 }
1756#endif
1757
1758 __ompt_lw_taskteam_unlink(master_th);
1759 // reset clear the task id only after unlinking the task
1760 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1761
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001762 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001764 ompt_parallel_id, ompt_task_id,
1765 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001766 }
1767 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1768 }
1769#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001770 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1771 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1772 team = master_th->th.th_team;
1773 //team->t.t_pkfn = microtask;
1774 team->t.t_invoke = invoker;
1775 __kmp_alloc_argv_entries( argc, team, TRUE );
1776 team->t.t_argc = argc;
1777 argv = (void**) team->t.t_argv;
1778 if ( ap ) {
1779 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001780// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001781# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001782 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001783# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001784 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001785# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786 } else {
1787 for( i=0; i < argc; ++i )
1788 // Get args from parent team for teams construct
1789 argv[i] = parent_team->t.t_argv[i];
1790 }
1791 // AC: revert change made in __kmpc_serialized_parallel()
1792 // because initial code in teams should have level=0
1793 team->t.t_level--;
1794 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798 invoker(gtid);
1799 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800 } else {
1801#endif /* OMP_40_ENABLED */
1802 argv = args;
1803 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001804// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001805#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001806 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001811
1812#if OMPT_SUPPORT
1813 void *dummy;
1814 void **exit_runtime_p;
1815
1816 ompt_lw_taskteam_t lw_taskteam;
1817
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001818 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1820 unwrapped_task, ompt_parallel_id);
1821 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1822 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1823
1824 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1825
1826#if OMPT_TRACE
1827 /* OMPT implicit task begin */
1828 my_task_id = lw_taskteam.ompt_task_info.task_id;
1829 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001830 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001831 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1832 my_parallel_id, my_task_id);
1833 }
1834#endif
1835
1836 /* OMPT state */
1837 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1838 } else {
1839 exit_runtime_p = &dummy;
1840 }
1841#endif
1842
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001843 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001844 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1845 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001846 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1847#if OMPT_SUPPORT
1848 , exit_runtime_p
1849#endif
1850 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001852
1853#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001854 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001855#if OMPT_TRACE
1856 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1857
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001858 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001859 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1860 my_parallel_id, my_task_id);
1861 }
1862#endif
1863
1864 __ompt_lw_taskteam_unlink(master_th);
1865 // reset clear the task id only after unlinking the task
1866 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1867
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001868 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001869 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001870 ompt_parallel_id, ompt_task_id,
1871 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001872 }
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1874 }
1875#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876#if OMP_40_ENABLED
1877 }
1878#endif /* OMP_40_ENABLED */
1879 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001880 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001881#if OMPT_SUPPORT
1882 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1883 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1884 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1885 unwrapped_task, ompt_parallel_id);
1886
1887 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1888 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1889 __ompt_lw_taskteam_link(lwt, master_th);
1890#endif
1891
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001892 // we were called from GNU native code
1893 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1894 return FALSE;
1895 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001897 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001898 }
1899
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001901 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902 KMP_MB();
1903 return FALSE;
1904 }
1905
Jim Cownie5e8470a2013-09-27 10:38:44 +00001906 // GEH: only modify the executing flag in the case when not serialized
1907 // serialized case is handled in kmpc_serialized_parallel
1908 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001909 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1910 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1912 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1913 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914
1915#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001916 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917#endif /* OMP_40_ENABLED */
1918 {
1919 /* Increment our nested depth level */
1920 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1921 }
1922
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001925 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1926 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927 }
1928 else {
1929 nthreads_icv = 0; // don't update
1930 }
1931
1932#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001935 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1937 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 if (proc_bind == proc_bind_default) {
1941 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001944 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1945 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001947 if ((level+1 < __kmp_nested_proc_bind.used)
1948 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1949 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950 }
1951 }
1952
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 master_th->th.th_set_proc_bind = proc_bind_default;
1955#endif /* OMP_40_ENABLED */
1956
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001957 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001959 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001961 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001963 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001965 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966 new_icvs.nproc = nthreads_icv;
1967 }
1968
1969#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 new_icvs.proc_bind = proc_bind_icv;
1972 }
1973#endif /* OMP_40_ENABLED */
1974
1975 /* allocate a new parallel team */
1976 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1977 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001978#if OMPT_SUPPORT
1979 ompt_parallel_id,
1980#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001982 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001984 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1985 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986 /* allocate a new parallel team */
1987 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001989#if OMPT_SUPPORT
1990 ompt_parallel_id,
1991#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001995 &master_th->th.th_current_task->td_icvs, argc
1996 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999
2000 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002001 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2002 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2003 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2004 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2005 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002006#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002007 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002008#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002009 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010 // TODO: parent_team->t.t_level == INT_MAX ???
2011#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002012 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002014 int new_level = parent_team->t.t_level + 1;
2015 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2016 new_level = parent_team->t.t_active_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#if OMP_40_ENABLED
2019 } else {
2020 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002021 int new_level = parent_team->t.t_level;
2022 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2023 new_level = parent_team->t.t_active_level;
2024 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025 }
2026#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002027 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002028 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002029 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002031#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002033#endif
2034
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002035 // Update the floating point rounding in the team if required.
2036 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037
2038 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002039 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002040#if 0
2041 // Patch out an assertion that trips while the runtime seems to operate correctly.
2042 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002043 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002044#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002047 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002048
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002049 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002050 // Take a memo of master's task_state
2051 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2052 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002053 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2054 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002055 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002056 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002057 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2058 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2059 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002060 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2061 new_stack[i] = 0;
2062 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002063 old_stack = master_th->th.th_task_state_memo_stack;
2064 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002066 __kmp_free(old_stack);
2067 }
2068 // Store master's task_state on stack
2069 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2070 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002071#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002072 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002073 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2074 }
2075 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002076#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002077 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002078#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002079 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002080#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002081 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002082#if !KMP_NESTED_HOT_TEAMS
2083 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2084#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086
2087 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2088 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2089 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2090 ( team->t.t_master_tid == 0 &&
2091 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2092 KMP_MB();
2093
2094 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002095 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096#if OMP_40_ENABLED
2097 if ( ap ) {
2098#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002099 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002101#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002102 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002104 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002106 KMP_CHECK_UPDATE(*argv, new_argv);
2107 argv++;
2108 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109#if OMP_40_ENABLED
2110 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2114 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115 }
2116#endif /* OMP_40_ENABLED */
2117
2118 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002119 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002120 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2121 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122
2123 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002124 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002125
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002126#if OMPT_SUPPORT
2127 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2128#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129
2130 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2131
Jim Cownie5e8470a2013-09-27 10:38:44 +00002132#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002133 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002135 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002137 ) {
2138#if USE_ITT_NOTIFY
2139 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2140 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002141 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 kmp_uint64 tmp_time = 0;
2143 if ( __itt_get_timestamp_ptr )
2144 tmp_time = __itt_get_timestamp();
2145 // Internal fork - report frame begin
2146 master_th->th.th_frame_time = tmp_time;
2147 if ( __kmp_forkjoin_frames_mode == 3 )
2148 team->t.t_region_time = tmp_time;
2149 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2150#endif /* USE_ITT_NOTIFY */
2151 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2152 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2153 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002154 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2155 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157#endif /* USE_ITT_BUILD */
2158
2159 /* now go on and do the work */
2160 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2161 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002162 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2163 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002164
2165#if USE_ITT_BUILD
2166 if ( __itt_stack_caller_create_ptr ) {
2167 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2168 }
2169#endif /* USE_ITT_BUILD */
2170
2171#if OMP_40_ENABLED
2172 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2173#endif /* OMP_40_ENABLED */
2174 {
2175 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002176 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2177 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178 }
2179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002180 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002181 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2182 return TRUE;
2183 }
2184
2185 /* Invoke microtask for MASTER thread */
2186 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2187 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002188 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002190 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002191 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2192 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00002193 // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002194 if (! team->t.t_invoke( gtid )) {
2195 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002197 }
2198 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2199 gtid, team->t.t_id, team->t.t_pkfn ) );
2200 KMP_MB(); /* Flush all pending memory write invalidates. */
2201
2202 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2203
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002204#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002205 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002206 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2207 }
2208#endif
2209
Jim Cownie5e8470a2013-09-27 10:38:44 +00002210 return TRUE;
2211}
2212
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002213#if OMPT_SUPPORT
2214static inline void
2215__kmp_join_restore_state(
2216 kmp_info_t *thread,
2217 kmp_team_t *team)
2218{
2219 // restore state outside the region
2220 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2221 ompt_state_work_serial : ompt_state_work_parallel);
2222}
2223
2224static inline void
2225__kmp_join_ompt(
2226 kmp_info_t *thread,
2227 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002228 ompt_parallel_id_t parallel_id,
2229 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002230{
2231 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2232 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2233 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002234 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235 }
2236
2237 __kmp_join_restore_state(thread,team);
2238}
2239#endif
2240
Jim Cownie5e8470a2013-09-27 10:38:44 +00002241void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002242__kmp_join_call(ident_t *loc, int gtid
2243#if OMPT_SUPPORT
2244 , enum fork_context_e fork_context
2245#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246#if OMP_40_ENABLED
2247 , int exit_teams
2248#endif /* OMP_40_ENABLED */
2249)
2250{
Jonathan Peyton45be4502015-08-11 21:36:41 +00002251 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252 kmp_team_t *team;
2253 kmp_team_t *parent_team;
2254 kmp_info_t *master_th;
2255 kmp_root_t *root;
2256 int master_active;
2257 int i;
2258
2259 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2260
2261 /* setup current data */
2262 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002263 root = master_th->th.th_root;
2264 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265 parent_team = team->t.t_parent;
2266
2267 master_th->th.th_ident = loc;
2268
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002269#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002270 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002271 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2272 }
2273#endif
2274
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002275#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2277 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2278 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002279 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2280 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002282#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
2284 if( team->t.t_serialized ) {
2285#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002286 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 // We are in teams construct
2288 int level = team->t.t_level;
2289 int tlevel = master_th->th.th_teams_level;
2290 if ( level == tlevel ) {
2291 // AC: we haven't incremented it earlier at start of teams construct,
2292 // so do it here - at the end of teams construct
2293 team->t.t_level++;
2294 } else if ( level == tlevel + 1 ) {
2295 // AC: we are exiting parallel inside teams, need to increment serialization
2296 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2297 team->t.t_serialized++;
2298 }
2299 }
2300#endif /* OMP_40_ENABLED */
2301 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002302
2303#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002304 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002305 __kmp_join_restore_state(master_th, parent_team);
2306 }
2307#endif
2308
Jim Cownie5e8470a2013-09-27 10:38:44 +00002309 return;
2310 }
2311
2312 master_active = team->t.t_master_active;
2313
2314#if OMP_40_ENABLED
2315 if (!exit_teams)
2316#endif /* OMP_40_ENABLED */
2317 {
2318 // AC: No barrier for internal teams at exit from teams construct.
2319 // But there is barrier for external team (league).
2320 __kmp_internal_join( loc, gtid, team );
2321 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002322#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002323 else {
2324 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2325 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002326#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002327
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328 KMP_MB();
2329
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002330#if OMPT_SUPPORT
2331 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2332#endif
2333
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334#if USE_ITT_BUILD
2335 if ( __itt_stack_caller_create_ptr ) {
2336 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2337 }
2338
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002339 // Mark end of "parallel" region for VTune.
2340 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002342 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002344 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002345 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002346 // only one notification scheme (either "submit" or "forking/joined", not both)
2347 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2348 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2349 0, loc, master_th->th.th_team_nproc, 1 );
2350 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2351 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2352 __kmp_itt_region_joined( gtid );
2353 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354#endif /* USE_ITT_BUILD */
2355
2356#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002357 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358 !exit_teams &&
2359 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2360 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2361 // AC: We need to leave the team structure intact at the end
2362 // of parallel inside the teams construct, so that at the next
2363 // parallel same (hot) team works, only adjust nesting levels
2364
2365 /* Decrement our nested depth level */
2366 team->t.t_level --;
2367 team->t.t_active_level --;
2368 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2369
2370 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002373 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374 kmp_info_t **other_threads = team->t.t_threads;
2375 team->t.t_nproc = new_num;
2376 for ( i = 0; i < old_num; ++i ) {
2377 other_threads[i]->th.th_team_nproc = new_num;
2378 }
2379 // Adjust states of non-used threads of the team
2380 for ( i = old_num; i < new_num; ++i ) {
2381 // Re-initialize thread's barrier data.
2382 int b;
2383 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002384 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002386 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002387#if USE_DEBUGGER
2388 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002391 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2392 // Synchronize thread's task state
2393 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2394 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 }
2396 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002397
2398#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002399 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002400 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002401 }
2402#endif
2403
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 return;
2405 }
2406#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002408 /* do cleanup and restore the parent team */
2409 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2410 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2411
2412 master_th->th.th_dispatch =
2413 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414
2415 /* jc: The following lock has instructions with REL and ACQ semantics,
2416 separating the parallel user code called in this parallel region
2417 from the serial user code called after this function returns.
2418 */
2419 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2420
2421#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002422 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423#endif /* OMP_40_ENABLED */
2424 {
2425 /* Decrement our nested depth level */
2426 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2427 }
2428 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2429
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002430#if OMPT_SUPPORT && OMPT_TRACE
2431 if(ompt_enabled){
2432 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2433 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2434 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2435 parallel_id, task_info->task_id);
2436 }
2437 task_info->frame.exit_runtime_frame = 0;
2438 task_info->task_id = 0;
2439 }
2440#endif
2441
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2443 0, master_th, team ) );
2444 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445
Alp Toker98758b02014-03-02 04:12:06 +00002446#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447 //
2448 // Restore master thread's partition.
2449 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002450 master_th->th.th_first_place = team->t.t_first_place;
2451 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452#endif /* OMP_40_ENABLED */
2453
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002454 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002456 if ( root->r.r_active != master_active )
2457 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460
2461 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002462 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002464 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465 * assertion failures. it's only one deref&assign so might as well put this
2466 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002467 master_th->th.th_team = parent_team;
2468 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2469 master_th->th.th_team_master = parent_team->t.t_threads[0];
2470 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471
2472 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002473 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 parent_team != master_th->th.th_serial_team &&
2475 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002476 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2477 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478 }
2479
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002481 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2482 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2483 // Remember master's state if we re-use this nested hot team
2484 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002485 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002486 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002487 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002489 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002490 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002492 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002495 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2496 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2497 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498
2499 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2500
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002501#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002502 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002503 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002504 }
2505#endif
2506
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507 KMP_MB();
2508 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2509}
2510
2511/* ------------------------------------------------------------------------ */
2512/* ------------------------------------------------------------------------ */
2513
2514/* Check whether we should push an internal control record onto the
2515 serial team stack. If so, do it. */
2516void
2517__kmp_save_internal_controls ( kmp_info_t * thread )
2518{
2519
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002520 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521 return;
2522 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002523 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524 int push = 0;
2525
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002526 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527 push = 1;
2528 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002529 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2530 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002531 push = 1;
2532 }
2533 }
2534 if (push) { /* push a record on the serial team's stack */
2535 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2536
Jim Cownie5e8470a2013-09-27 10:38:44 +00002537 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538
2539 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2540
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002541 control->next = thread->th.th_team->t.t_control_stack_top;
2542 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543 }
2544 }
2545}
2546
2547/* Changes set_nproc */
2548void
2549__kmp_set_num_threads( int new_nth, int gtid )
2550{
2551 kmp_info_t *thread;
2552 kmp_root_t *root;
2553
2554 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2555 KMP_DEBUG_ASSERT( __kmp_init_serial );
2556
2557 if (new_nth < 1)
2558 new_nth = 1;
2559 else if (new_nth > __kmp_max_nth)
2560 new_nth = __kmp_max_nth;
2561
Jonathan Peyton45be4502015-08-11 21:36:41 +00002562 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002563 thread = __kmp_threads[gtid];
2564
2565 __kmp_save_internal_controls( thread );
2566
2567 set__nproc( thread, new_nth );
2568
2569 //
2570 // If this omp_set_num_threads() call will cause the hot team size to be
2571 // reduced (in the absence of a num_threads clause), then reduce it now,
2572 // rather than waiting for the next parallel region.
2573 //
2574 root = thread->th.th_root;
2575 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002576 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2577#if KMP_NESTED_HOT_TEAMS
2578 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2579#endif
2580 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002581 kmp_team_t *hot_team = root->r.r_hot_team;
2582 int f;
2583
2584 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2585
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2588 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002589 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2590 // When decreasing team size, threads no longer in the team should unref task team.
2591 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2592 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593 __kmp_free_thread( hot_team->t.t_threads[f] );
2594 hot_team->t.t_threads[f] = NULL;
2595 }
2596 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002597#if KMP_NESTED_HOT_TEAMS
2598 if( thread->th.th_hot_teams ) {
2599 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2600 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2601 }
2602#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2605
2606 //
2607 // Update the t_nproc field in the threads that are still active.
2608 //
2609 for( f=0 ; f < new_nth; f++ ) {
2610 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2611 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2612 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002614 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616}
2617
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618/* Changes max_active_levels */
2619void
2620__kmp_set_max_active_levels( int gtid, int max_active_levels )
2621{
2622 kmp_info_t *thread;
2623
2624 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2625 KMP_DEBUG_ASSERT( __kmp_init_serial );
2626
2627 // validate max_active_levels
2628 if( max_active_levels < 0 ) {
2629 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2630 // We ignore this call if the user has specified a negative value.
2631 // The current setting won't be changed. The last valid setting will be used.
2632 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2633 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2634 return;
2635 }
2636 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2637 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2638 // We allow a zero value. (implementation defined behavior)
2639 } else {
2640 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2641 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2642 // Current upper limit is MAX_INT. (implementation defined behavior)
2643 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2644 // Actually, the flow should never get here until we use MAX_INT limit.
2645 }
2646 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2647
2648 thread = __kmp_threads[ gtid ];
2649
2650 __kmp_save_internal_controls( thread );
2651
2652 set__max_active_levels( thread, max_active_levels );
2653
2654}
2655
2656/* Gets max_active_levels */
2657int
2658__kmp_get_max_active_levels( int gtid )
2659{
2660 kmp_info_t *thread;
2661
2662 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2663 KMP_DEBUG_ASSERT( __kmp_init_serial );
2664
2665 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002666 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002668 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2669 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670}
2671
2672/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2673void
2674__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2675{
2676 kmp_info_t *thread;
2677// kmp_team_t *team;
2678
2679 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2680 KMP_DEBUG_ASSERT( __kmp_init_serial );
2681
2682 // Check if the kind parameter is valid, correct if needed.
2683 // Valid parameters should fit in one of two intervals - standard or extended:
2684 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2685 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2686 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2687 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2688 {
2689 // TODO: Hint needs attention in case we change the default schedule.
2690 __kmp_msg(
2691 kmp_ms_warning,
2692 KMP_MSG( ScheduleKindOutOfRange, kind ),
2693 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2694 __kmp_msg_null
2695 );
2696 kind = kmp_sched_default;
2697 chunk = 0; // ignore chunk value in case of bad kind
2698 }
2699
2700 thread = __kmp_threads[ gtid ];
2701
2702 __kmp_save_internal_controls( thread );
2703
2704 if ( kind < kmp_sched_upper_std ) {
2705 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2706 // differ static chunked vs. unchunked:
2707 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002708 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002710 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002711 }
2712 } else {
2713 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002714 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002715 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2716 }
2717 if ( kind == kmp_sched_auto ) {
2718 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002721 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722 }
2723}
2724
2725/* Gets def_sched_var ICV values */
2726void
2727__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2728{
2729 kmp_info_t *thread;
2730 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731
2732 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2733 KMP_DEBUG_ASSERT( __kmp_init_serial );
2734
2735 thread = __kmp_threads[ gtid ];
2736
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002737 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738
2739 switch ( th_type ) {
2740 case kmp_sch_static:
2741 case kmp_sch_static_greedy:
2742 case kmp_sch_static_balanced:
2743 *kind = kmp_sched_static;
2744 *chunk = 0; // chunk was not set, try to show this fact via zero value
2745 return;
2746 case kmp_sch_static_chunked:
2747 *kind = kmp_sched_static;
2748 break;
2749 case kmp_sch_dynamic_chunked:
2750 *kind = kmp_sched_dynamic;
2751 break;
2752 case kmp_sch_guided_chunked:
2753 case kmp_sch_guided_iterative_chunked:
2754 case kmp_sch_guided_analytical_chunked:
2755 *kind = kmp_sched_guided;
2756 break;
2757 case kmp_sch_auto:
2758 *kind = kmp_sched_auto;
2759 break;
2760 case kmp_sch_trapezoidal:
2761 *kind = kmp_sched_trapezoidal;
2762 break;
2763/*
2764 case kmp_sch_static_steal:
2765 *kind = kmp_sched_static_steal;
2766 break;
2767*/
2768 default:
2769 KMP_FATAL( UnknownSchedulingType, th_type );
2770 }
2771
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002772 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773}
2774
2775int
2776__kmp_get_ancestor_thread_num( int gtid, int level ) {
2777
2778 int ii, dd;
2779 kmp_team_t *team;
2780 kmp_info_t *thr;
2781
2782 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2783 KMP_DEBUG_ASSERT( __kmp_init_serial );
2784
2785 // validate level
2786 if( level == 0 ) return 0;
2787 if( level < 0 ) return -1;
2788 thr = __kmp_threads[ gtid ];
2789 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002790 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002791 if( level > ii ) return -1;
2792
2793#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002794 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795 // AC: we are in teams region where multiple nested teams have same level
2796 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2797 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2798 KMP_DEBUG_ASSERT( ii >= tlevel );
2799 // AC: As we need to pass by the teams league, we need to artificially increase ii
2800 if ( ii == tlevel ) {
2801 ii += 2; // three teams have same level
2802 } else {
2803 ii ++; // two teams have same level
2804 }
2805 }
2806 }
2807#endif
2808
2809 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2810
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002811 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002812 level++;
2813 while( ii > level )
2814 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002815 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002816 {
2817 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002818 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002819 team = team->t.t_parent;
2820 continue;
2821 }
2822 if( ii > level ) {
2823 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002824 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825 ii--;
2826 }
2827 }
2828
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830}
2831
2832int
2833__kmp_get_team_size( int gtid, int level ) {
2834
2835 int ii, dd;
2836 kmp_team_t *team;
2837 kmp_info_t *thr;
2838
2839 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2840 KMP_DEBUG_ASSERT( __kmp_init_serial );
2841
2842 // validate level
2843 if( level == 0 ) return 1;
2844 if( level < 0 ) return -1;
2845 thr = __kmp_threads[ gtid ];
2846 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002847 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848 if( level > ii ) return -1;
2849
2850#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002851 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002852 // AC: we are in teams region where multiple nested teams have same level
2853 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2854 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2855 KMP_DEBUG_ASSERT( ii >= tlevel );
2856 // AC: As we need to pass by the teams league, we need to artificially increase ii
2857 if ( ii == tlevel ) {
2858 ii += 2; // three teams have same level
2859 } else {
2860 ii ++; // two teams have same level
2861 }
2862 }
2863 }
2864#endif
2865
2866 while( ii > level )
2867 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002868 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002869 {
2870 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002871 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872 team = team->t.t_parent;
2873 continue;
2874 }
2875 if( ii > level ) {
2876 team = team->t.t_parent;
2877 ii--;
2878 }
2879 }
2880
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002881 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882}
2883
Jim Cownie5e8470a2013-09-27 10:38:44 +00002884kmp_r_sched_t
2885__kmp_get_schedule_global() {
2886// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2887// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2888
2889 kmp_r_sched_t r_sched;
2890
2891 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2892 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2893 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2894 if ( __kmp_sched == kmp_sch_static ) {
2895 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2896 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2897 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2898 } else {
2899 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2900 }
2901
2902 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2903 r_sched.chunk = KMP_DEFAULT_CHUNK;
2904 } else {
2905 r_sched.chunk = __kmp_chunk;
2906 }
2907
2908 return r_sched;
2909}
2910
2911/* ------------------------------------------------------------------------ */
2912/* ------------------------------------------------------------------------ */
2913
2914
2915/*
2916 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2917 * at least argc number of *t_argv entries for the requested team.
2918 */
2919static void
2920__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2921{
2922
2923 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002924 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925
2926 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2927 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002928 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2930 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002931
2932 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2933 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002934 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002935 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2936 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002937 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002938 if ( __kmp_storage_map ) {
2939 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2940 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2941 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2942 "team_%d.t_inline_argv",
2943 team->t.t_id );
2944 }
2945 } else {
2946 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002947 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2949 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2950 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002951 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952 if ( __kmp_storage_map ) {
2953 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2954 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2955 team->t.t_id );
2956 }
2957 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958 }
2959}
2960
2961static void
2962__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2963{
2964 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002965 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002966 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2967 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002968 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002969 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002970 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971 team->t.t_max_nproc = max_nth;
2972
2973 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002974 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002976#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002977 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2978#endif
2979 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980}
2981
2982static void
2983__kmp_free_team_arrays(kmp_team_t *team) {
2984 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2985 int i;
2986 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2987 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2988 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2989 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2990 }; // if
2991 }; // for
2992 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00002993 __kmp_free(team->t.t_disp_buffer);
2994 __kmp_free(team->t.t_dispatch);
2995 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002996 team->t.t_threads = NULL;
2997 team->t.t_disp_buffer = NULL;
2998 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002999 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003000}
3001
3002static void
3003__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3004 kmp_info_t **oldThreads = team->t.t_threads;
3005
Jonathan Peytona58563d2016-03-29 20:05:27 +00003006 __kmp_free(team->t.t_disp_buffer);
3007 __kmp_free(team->t.t_dispatch);
3008 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009 __kmp_allocate_team_arrays(team, max_nth);
3010
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003011 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003012
3013 __kmp_free(oldThreads);
3014}
3015
3016static kmp_internal_control_t
3017__kmp_get_global_icvs( void ) {
3018
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020
3021#if OMP_40_ENABLED
3022 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3023#endif /* OMP_40_ENABLED */
3024
3025 kmp_internal_control_t g_icvs = {
3026 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003027 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3028 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3029 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
3031 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003032 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3033 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3035 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036#if OMP_40_ENABLED
3037 __kmp_nested_proc_bind.bind_types[0],
3038#endif /* OMP_40_ENABLED */
3039 NULL //struct kmp_internal_control *next;
3040 };
3041
3042 return g_icvs;
3043}
3044
3045static kmp_internal_control_t
3046__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3047
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048 kmp_internal_control_t gx_icvs;
3049 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3050 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3051 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
3053 return gx_icvs;
3054}
3055
3056static void
3057__kmp_initialize_root( kmp_root_t *root )
3058{
3059 int f;
3060 kmp_team_t *root_team;
3061 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003063 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3064 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003065 KMP_DEBUG_ASSERT( root );
3066 KMP_ASSERT( ! root->r.r_begin );
3067
3068 /* setup the root state structure */
3069 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003070 root->r.r_begin = FALSE;
3071 root->r.r_active = FALSE;
3072 root->r.r_in_parallel = 0;
3073 root->r.r_blocktime = __kmp_dflt_blocktime;
3074 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003075
3076 /* setup the root team for this task */
3077 /* allocate the root team structure */
3078 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003079
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080 root_team =
3081 __kmp_allocate_team(
3082 root,
3083 1, // new_nproc
3084 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003085#if OMPT_SUPPORT
3086 0, // root parallel id
3087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003088#if OMP_40_ENABLED
3089 __kmp_nested_proc_bind.bind_types[0],
3090#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003091 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003093 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003094 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003095#if USE_DEBUGGER
3096 // Non-NULL value should be assigned to make the debugger display the root team.
3097 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3098#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099
3100 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3101
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003102 root->r.r_root_team = root_team;
3103 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104
3105 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003106 root_team->t.t_threads[0] = NULL;
3107 root_team->t.t_nproc = 1;
3108 root_team->t.t_serialized = 1;
3109 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3110 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3111 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003112 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3113 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3114
3115 /* setup the hot team for this task */
3116 /* allocate the hot team structure */
3117 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003118
Jim Cownie5e8470a2013-09-27 10:38:44 +00003119 hot_team =
3120 __kmp_allocate_team(
3121 root,
3122 1, // new_nproc
3123 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003124#if OMPT_SUPPORT
3125 0, // root parallel id
3126#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003127#if OMP_40_ENABLED
3128 __kmp_nested_proc_bind.bind_types[0],
3129#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003130 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003132 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133 );
3134 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3135
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003136 root->r.r_hot_team = hot_team;
3137 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138
3139 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003140 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141
3142 /* initialize hot team */
3143 hot_team_max_nth = hot_team->t.t_max_nproc;
3144 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003145 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003147 hot_team->t.t_nproc = 1;
3148 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3149 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3150 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003151 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003152}
3153
3154#ifdef KMP_DEBUG
3155
3156
3157typedef struct kmp_team_list_item {
3158 kmp_team_p const * entry;
3159 struct kmp_team_list_item * next;
3160} kmp_team_list_item_t;
3161typedef kmp_team_list_item_t * kmp_team_list_t;
3162
3163
3164static void
3165__kmp_print_structure_team_accum( // Add team to list of teams.
3166 kmp_team_list_t list, // List of teams.
3167 kmp_team_p const * team // Team to add.
3168) {
3169
3170 // List must terminate with item where both entry and next are NULL.
3171 // Team is added to the list only once.
3172 // List is sorted in ascending order by team id.
3173 // Team id is *not* a key.
3174
3175 kmp_team_list_t l;
3176
3177 KMP_DEBUG_ASSERT( list != NULL );
3178 if ( team == NULL ) {
3179 return;
3180 }; // if
3181
3182 __kmp_print_structure_team_accum( list, team->t.t_parent );
3183 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3184
3185 // Search list for the team.
3186 l = list;
3187 while ( l->next != NULL && l->entry != team ) {
3188 l = l->next;
3189 }; // while
3190 if ( l->next != NULL ) {
3191 return; // Team has been added before, exit.
3192 }; // if
3193
3194 // Team is not found. Search list again for insertion point.
3195 l = list;
3196 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3197 l = l->next;
3198 }; // while
3199
3200 // Insert team.
3201 {
3202 kmp_team_list_item_t * item =
3203 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3204 * item = * l;
3205 l->entry = team;
3206 l->next = item;
3207 }
3208
3209}
3210
3211static void
3212__kmp_print_structure_team(
3213 char const * title,
3214 kmp_team_p const * team
3215
3216) {
3217 __kmp_printf( "%s", title );
3218 if ( team != NULL ) {
3219 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3220 } else {
3221 __kmp_printf( " - (nil)\n" );
3222 }; // if
3223}
3224
3225static void
3226__kmp_print_structure_thread(
3227 char const * title,
3228 kmp_info_p const * thread
3229
3230) {
3231 __kmp_printf( "%s", title );
3232 if ( thread != NULL ) {
3233 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3234 } else {
3235 __kmp_printf( " - (nil)\n" );
3236 }; // if
3237}
3238
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003239void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003240__kmp_print_structure(
3241 void
3242) {
3243
3244 kmp_team_list_t list;
3245
3246 // Initialize list of teams.
3247 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3248 list->entry = NULL;
3249 list->next = NULL;
3250
3251 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3252 {
3253 int gtid;
3254 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3255 __kmp_printf( "%2d", gtid );
3256 if ( __kmp_threads != NULL ) {
3257 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3258 }; // if
3259 if ( __kmp_root != NULL ) {
3260 __kmp_printf( " %p", __kmp_root[ gtid ] );
3261 }; // if
3262 __kmp_printf( "\n" );
3263 }; // for gtid
3264 }
3265
3266 // Print out __kmp_threads array.
3267 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3268 if ( __kmp_threads != NULL ) {
3269 int gtid;
3270 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3271 kmp_info_t const * thread = __kmp_threads[ gtid ];
3272 if ( thread != NULL ) {
3273 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3274 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3275 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3276 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3277 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3278 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3279 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3280 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3281#if OMP_40_ENABLED
3282 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3283#endif
3284 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3285 __kmp_printf( "\n" );
3286 __kmp_print_structure_team_accum( list, thread->th.th_team );
3287 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3288 }; // if
3289 }; // for gtid
3290 } else {
3291 __kmp_printf( "Threads array is not allocated.\n" );
3292 }; // if
3293
3294 // Print out __kmp_root array.
3295 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3296 if ( __kmp_root != NULL ) {
3297 int gtid;
3298 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3299 kmp_root_t const * root = __kmp_root[ gtid ];
3300 if ( root != NULL ) {
3301 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3302 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3303 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3304 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3305 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3306 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3307 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3308 __kmp_printf( "\n" );
3309 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3310 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3311 }; // if
3312 }; // for gtid
3313 } else {
3314 __kmp_printf( "Ubers array is not allocated.\n" );
3315 }; // if
3316
3317 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3318 while ( list->next != NULL ) {
3319 kmp_team_p const * team = list->entry;
3320 int i;
3321 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3322 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3323 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3324 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3325 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3326 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3327 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3328 __kmp_printf( " Thread %2d: ", i );
3329 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3330 }; // for i
3331 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3332 __kmp_printf( "\n" );
3333 list = list->next;
3334 }; // while
3335
3336 // Print out __kmp_thread_pool and __kmp_team_pool.
3337 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3338 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3339 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3340 __kmp_printf( "\n" );
3341
3342 // Free team list.
3343 while ( list != NULL ) {
3344 kmp_team_list_item_t * item = list;
3345 list = list->next;
3346 KMP_INTERNAL_FREE( item );
3347 }; // while
3348
3349}
3350
3351#endif
3352
3353
3354//---------------------------------------------------------------------------
3355// Stuff for per-thread fast random number generator
3356// Table of primes
3357
3358static const unsigned __kmp_primes[] = {
3359 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3360 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3361 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3362 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3363 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3364 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3365 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3366 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3367 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3368 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3369 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3370 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3371 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3372 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3373 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3374 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3375};
3376
3377//---------------------------------------------------------------------------
3378// __kmp_get_random: Get a random number using a linear congruential method.
3379
3380unsigned short
3381__kmp_get_random( kmp_info_t * thread )
3382{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003383 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003384 unsigned short r = x>>16;
3385
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003386 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003387
3388 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3389 thread->th.th_info.ds.ds_tid, r) );
3390
3391 return r;
3392}
3393//--------------------------------------------------------
3394// __kmp_init_random: Initialize a random number generator
3395
3396void
3397__kmp_init_random( kmp_info_t * thread )
3398{
3399 unsigned seed = thread->th.th_info.ds.ds_tid;
3400
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003401 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3402 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3403 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404}
3405
3406
3407#if KMP_OS_WINDOWS
3408/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3409static int
3410__kmp_reclaim_dead_roots(void) {
3411 int i, r = 0;
3412
3413 for(i = 0; i < __kmp_threads_capacity; ++i) {
3414 if( KMP_UBER_GTID( i ) &&
3415 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3416 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3417 r += __kmp_unregister_root_other_thread(i);
3418 }
3419 }
3420 return r;
3421}
3422#endif
3423
3424/*
3425 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3426 free entries generated.
3427
3428 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3429 already dead.
3430
3431 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3432 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3433 __kmp_tp_capacity, if threadprivate cache array has been created.
3434 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3435
3436 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3437 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3438 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3439 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3440 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3441 as many free slots as possible up to nWish.
3442
3443 If any argument is negative, the behavior is undefined.
3444*/
3445static int
3446__kmp_expand_threads(int nWish, int nNeed) {
3447 int added = 0;
3448 int old_tp_cached;
3449 int __kmp_actual_max_nth;
3450
3451 if(nNeed > nWish) /* normalize the arguments */
3452 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003453#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454/* only for Windows static library */
3455 /* reclaim array entries for root threads that are already dead */
3456 added = __kmp_reclaim_dead_roots();
3457
3458 if(nNeed) {
3459 nNeed -= added;
3460 if(nNeed < 0)
3461 nNeed = 0;
3462 }
3463 if(nWish) {
3464 nWish -= added;
3465 if(nWish < 0)
3466 nWish = 0;
3467 }
3468#endif
3469 if(nWish <= 0)
3470 return added;
3471
3472 while(1) {
3473 int nTarget;
3474 int minimumRequiredCapacity;
3475 int newCapacity;
3476 kmp_info_t **newThreads;
3477 kmp_root_t **newRoot;
3478
3479 //
3480 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3481 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3482 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3483 // become > __kmp_max_nth in one of two ways:
3484 //
3485 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3486 // may not be resused by another thread, so we may need to increase
3487 // __kmp_threads_capacity to __kmp_max_threads + 1.
3488 //
3489 // 2) New foreign root(s) are encountered. We always register new
3490 // foreign roots. This may cause a smaller # of threads to be
3491 // allocated at subsequent parallel regions, but the worker threads
3492 // hang around (and eventually go to sleep) and need slots in the
3493 // __kmp_threads[] array.
3494 //
3495 // Anyway, that is the reason for moving the check to see if
3496 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3497 // instead of having it performed here. -BB
3498 //
3499 old_tp_cached = __kmp_tp_cached;
3500 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3501 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3502
3503 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3504 nTarget = nWish;
3505 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3506 /* can't fulfil nWish, so try nNeed */
3507 if(nNeed) {
3508 nTarget = nNeed;
3509 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3510 /* possible expansion too small -- give up */
3511 break;
3512 }
3513 } else {
3514 /* best-effort */
3515 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3516 if(!nTarget) {
3517 /* can expand at all -- give up */
3518 break;
3519 }
3520 }
3521 }
3522 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3523
3524 newCapacity = __kmp_threads_capacity;
3525 do{
3526 newCapacity =
3527 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3528 (newCapacity << 1) :
3529 __kmp_actual_max_nth;
3530 } while(newCapacity < minimumRequiredCapacity);
3531 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3532 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003533 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3534 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003535 memset(newThreads + __kmp_threads_capacity, 0,
3536 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3537 memset(newRoot + __kmp_threads_capacity, 0,
3538 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3539
3540 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3541 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3542 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3543 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3544 of a double-check pair.
3545 */
3546 __kmp_free(newThreads);
3547 continue; /* start over and try again */
3548 }
3549 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3550 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3551 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3552 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3553 __kmp_free(newThreads);
3554 continue; /* start over and try again */
3555 } else {
3556 /* success */
3557 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3558 //
3559 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3560 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3561 added += newCapacity - __kmp_threads_capacity;
3562 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3563 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003564 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003565 }
3566 }
3567 return added;
3568}
3569
3570/* register the current thread as a root thread and obtain our gtid */
3571/* we must have the __kmp_initz_lock held at this point */
3572/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3573int
3574__kmp_register_root( int initial_thread )
3575{
3576 kmp_info_t *root_thread;
3577 kmp_root_t *root;
3578 int gtid;
3579 int capacity;
3580 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3581 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3582 KMP_MB();
3583
3584
3585 /*
3586 2007-03-02:
3587
3588 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3589 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3590 return false (that means there is at least one empty slot in __kmp_threads array), but it
3591 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3592 used for this one. Following code workarounds this bug.
3593
3594 However, right solution seems to be not reserving slot #0 for initial thread because:
3595 (1) there is no magic in slot #0,
3596 (2) we cannot detect initial thread reliably (the first thread which does serial
3597 initialization may be not a real initial thread).
3598 */
3599 capacity = __kmp_threads_capacity;
3600 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3601 -- capacity;
3602 }; // if
3603
3604 /* see if there are too many threads */
3605 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3606 if ( __kmp_tp_cached ) {
3607 __kmp_msg(
3608 kmp_ms_fatal,
3609 KMP_MSG( CantRegisterNewThread ),
3610 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3611 KMP_HNT( PossibleSystemLimitOnThreads ),
3612 __kmp_msg_null
3613 );
3614 }
3615 else {
3616 __kmp_msg(
3617 kmp_ms_fatal,
3618 KMP_MSG( CantRegisterNewThread ),
3619 KMP_HNT( SystemLimitOnThreads ),
3620 __kmp_msg_null
3621 );
3622 }
3623 }; // if
3624
3625 /* find an available thread slot */
3626 /* Don't reassign the zero slot since we need that to only be used by initial
3627 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003628 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3629 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003630 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3631 KMP_ASSERT( gtid < __kmp_threads_capacity );
3632
3633 /* update global accounting */
3634 __kmp_all_nth ++;
3635 TCW_4(__kmp_nth, __kmp_nth + 1);
3636
3637 //
3638 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3639 // for low numbers of procs, and method #2 (keyed API call) for higher
3640 // numbers of procs.
3641 //
3642 if ( __kmp_adjust_gtid_mode ) {
3643 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3644 if ( TCR_4(__kmp_gtid_mode) != 2) {
3645 TCW_4(__kmp_gtid_mode, 2);
3646 }
3647 }
3648 else {
3649 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3650 TCW_4(__kmp_gtid_mode, 1);
3651 }
3652 }
3653 }
3654
3655#ifdef KMP_ADJUST_BLOCKTIME
3656 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003657 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003658 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3659 if ( __kmp_nth > __kmp_avail_proc ) {
3660 __kmp_zero_bt = TRUE;
3661 }
3662 }
3663#endif /* KMP_ADJUST_BLOCKTIME */
3664
3665 /* setup this new hierarchy */
3666 if( ! ( root = __kmp_root[gtid] )) {
3667 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3668 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3669 }
3670
3671 __kmp_initialize_root( root );
3672
3673 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003674 if( root->r.r_uber_thread ) {
3675 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676 } else {
3677 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3678 if ( __kmp_storage_map ) {
3679 __kmp_print_thread_storage_map( root_thread, gtid );
3680 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003681 root_thread->th.th_info .ds.ds_gtid = gtid;
3682 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003683 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003684 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 }
3686 #if USE_FAST_MEMORY
3687 __kmp_initialize_fast_memory( root_thread );
3688 #endif /* USE_FAST_MEMORY */
3689
3690 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003691 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692 __kmp_initialize_bget( root_thread );
3693 #endif
3694 __kmp_init_random( root_thread ); // Initialize random number generator
3695 }
3696
3697 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003698 if( ! root_thread->th.th_serial_team ) {
3699 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003701
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003703#if OMPT_SUPPORT
3704 0, // root parallel id
3705#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706#if OMP_40_ENABLED
3707 proc_bind_default,
3708#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003709 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003710 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003712 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003714 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003715
3716 /* drop root_thread into place */
3717 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3718
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 root->r.r_root_team->t.t_threads[0] = root_thread;
3720 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3721 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3722 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3723 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724
3725 /* initialize the thread, get it ready to go */
3726 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003727 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728
3729 /* prepare the master thread for get_gtid() */
3730 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003731
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003732#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003733 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003734#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003735
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736 #ifdef KMP_TDATA_GTID
3737 __kmp_gtid = gtid;
3738 #endif
3739 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3740 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
3742 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3743 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003744 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745 KMP_INIT_BARRIER_STATE ) );
3746 { // Initialize barrier data.
3747 int b;
3748 for ( b = 0; b < bs_last_barrier; ++ b ) {
3749 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003750#if USE_DEBUGGER
3751 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3752#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003753 }; // for
3754 }
3755 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3756
Alp Toker763b9392014-02-28 09:42:41 +00003757#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003758# if OMP_40_ENABLED
3759 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3760 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3761 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3762 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3763# endif
3764
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765 if ( TCR_4(__kmp_init_middle) ) {
3766 __kmp_affinity_set_init_mask( gtid, TRUE );
3767 }
Alp Toker763b9392014-02-28 09:42:41 +00003768#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
3770 __kmp_root_counter ++;
3771
3772 KMP_MB();
3773 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3774
3775 return gtid;
3776}
3777
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003778#if KMP_NESTED_HOT_TEAMS
3779static int
3780__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3781{
3782 int i, n, nth;
3783 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3784 if( !hot_teams || !hot_teams[level].hot_team ) {
3785 return 0;
3786 }
3787 KMP_DEBUG_ASSERT( level < max_level );
3788 kmp_team_t *team = hot_teams[level].hot_team;
3789 nth = hot_teams[level].hot_team_nth;
3790 n = nth - 1; // master is not freed
3791 if( level < max_level - 1 ) {
3792 for( i = 0; i < nth; ++i ) {
3793 kmp_info_t *th = team->t.t_threads[i];
3794 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3795 if( i > 0 && th->th.th_hot_teams ) {
3796 __kmp_free( th->th.th_hot_teams );
3797 th->th.th_hot_teams = NULL;
3798 }
3799 }
3800 }
3801 __kmp_free_team( root, team, NULL );
3802 return n;
3803}
3804#endif
3805
Jim Cownie5e8470a2013-09-27 10:38:44 +00003806/* Resets a root thread and clear its root and hot teams.
3807 Returns the number of __kmp_threads entries directly and indirectly freed.
3808*/
3809static int
3810__kmp_reset_root(int gtid, kmp_root_t *root)
3811{
3812 kmp_team_t * root_team = root->r.r_root_team;
3813 kmp_team_t * hot_team = root->r.r_hot_team;
3814 int n = hot_team->t.t_nproc;
3815 int i;
3816
3817 KMP_DEBUG_ASSERT( ! root->r.r_active );
3818
3819 root->r.r_root_team = NULL;
3820 root->r.r_hot_team = NULL;
3821 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3822 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003823 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3824#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003825 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003826 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3827 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003828 if( __kmp_hot_teams_max_level > 1 ) {
3829 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3830 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003831 if( th->th.th_hot_teams ) {
3832 __kmp_free( th->th.th_hot_teams );
3833 th->th.th_hot_teams = NULL;
3834 }
3835 }
3836 }
3837#endif
3838 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003839
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840 //
3841 // Before we can reap the thread, we need to make certain that all
3842 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3843 //
3844 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3845 __kmp_wait_to_unref_task_teams();
3846 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003847
3848 #if KMP_OS_WINDOWS
3849 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3850 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3851 (LPVOID)&(root->r.r_uber_thread->th),
3852 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3853 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3854 #endif /* KMP_OS_WINDOWS */
3855
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003856#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003857 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003858 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3859 int gtid = __kmp_get_gtid();
3860 __ompt_thread_end(ompt_thread_initial, gtid);
3861 }
3862#endif
3863
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3865 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3866
3867 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3868 root->r.r_uber_thread = NULL;
3869 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003870 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871
3872 return n;
3873}
3874
3875void
3876__kmp_unregister_root_current_thread( int gtid )
3877{
Jim Cownie77c2a632014-09-03 11:34:33 +00003878 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003879 /* this lock should be ok, since unregister_root_current_thread is never called during
3880 * and abort, only during a normal close. furthermore, if you have the
3881 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003882
3883 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3884 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3885 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3886 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3887 return;
3888 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003889 kmp_root_t *root = __kmp_root[gtid];
3890
Jim Cownie5e8470a2013-09-27 10:38:44 +00003891 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3892 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3893 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3894 KMP_ASSERT( root->r.r_active == FALSE );
3895
Jim Cownie5e8470a2013-09-27 10:38:44 +00003896
3897 KMP_MB();
3898
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003899#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003900 kmp_info_t * thread = __kmp_threads[gtid];
3901 kmp_team_t * team = thread->th.th_team;
3902 kmp_task_team_t * task_team = thread->th.th_task_team;
3903
3904 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003905 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3906#if OMPT_SUPPORT
3907 // the runtime is shutting down so we won't report any events
3908 thread->th.ompt_thread_info.state = ompt_state_undefined;
3909#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003910 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003911 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003912#endif
3913
Jim Cownie5e8470a2013-09-27 10:38:44 +00003914 __kmp_reset_root(gtid, root);
3915
3916 /* free up this thread slot */
3917 __kmp_gtid_set_specific( KMP_GTID_DNE );
3918#ifdef KMP_TDATA_GTID
3919 __kmp_gtid = KMP_GTID_DNE;
3920#endif
3921
3922 KMP_MB();
3923 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3924
3925 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3926}
3927
Jonathan Peyton2321d572015-06-08 19:25:25 +00003928#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003929/* __kmp_forkjoin_lock must be already held
3930 Unregisters a root thread that is not the current thread. Returns the number of
3931 __kmp_threads entries freed as a result.
3932 */
3933static int
3934__kmp_unregister_root_other_thread( int gtid )
3935{
3936 kmp_root_t *root = __kmp_root[gtid];
3937 int r;
3938
3939 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3940 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3941 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3942 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3943 KMP_ASSERT( root->r.r_active == FALSE );
3944
3945 r = __kmp_reset_root(gtid, root);
3946 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3947 return r;
3948}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003949#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003950
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951#if KMP_DEBUG
3952void __kmp_task_info() {
3953
3954 kmp_int32 gtid = __kmp_entry_gtid();
3955 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3956 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003957 kmp_team_t *steam = this_thr->th.th_serial_team;
3958 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003959
3960 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3961 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3962}
3963#endif // KMP_DEBUG
3964
Jim Cownie5e8470a2013-09-27 10:38:44 +00003965/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003966 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003967 * initialization of features as much as possible */
3968static void
3969__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3970{
3971 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3972 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003973 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003975 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003977 KMP_DEBUG_ASSERT( team->t.t_threads );
3978 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3979 KMP_DEBUG_ASSERT( master );
3980 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003981
3982 KMP_MB();
3983
3984 TCW_SYNC_PTR(this_thr->th.th_team, team);
3985
3986 this_thr->th.th_info.ds.ds_tid = tid;
3987 this_thr->th.th_set_nproc = 0;
3988#if OMP_40_ENABLED
3989 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00003990# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003991 this_thr->th.th_new_place = this_thr->th.th_current_place;
3992# endif
3993#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003994 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003995
3996 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003997 this_thr->th.th_team_nproc = team->t.t_nproc;
3998 this_thr->th.th_team_master = master;
3999 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4001
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004002 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003
4004 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4005 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4006
4007 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4008
4009 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4010 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4011 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004012
4013 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004014 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015
4016 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004017
4018#ifdef BUILD_TV
4019 this_thr->th.th_local.tv_data = 0;
4020#endif
4021
4022 if ( ! this_thr->th.th_pri_common ) {
4023 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4024 if ( __kmp_storage_map ) {
4025 __kmp_print_storage_map_gtid(
4026 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4027 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4028 );
4029 }; // if
4030 this_thr->th.th_pri_head = NULL;
4031 }; // if
4032
4033 /* Initialize dynamic dispatch */
4034 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004035 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004036 /*
4037 * Use team max_nproc since this will never change for the team.
4038 */
4039 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004040 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004041 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4042 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004043 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004044 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4045
4046 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004047#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004048 dispatch->th_doacross_buf_idx = 0;
4049#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004050 if( ! dispatch->th_disp_buffer ) {
4051 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004052
4053 if ( __kmp_storage_map ) {
4054 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004055 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004056 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4057 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4058 gtid, team->t.t_id, gtid );
4059 }
4060 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004061 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004062 }
4063
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004064 dispatch->th_dispatch_pr_current = 0;
4065 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004067 dispatch->th_deo_fcn = 0; /* ORDERED */
4068 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069 }
4070
4071 this_thr->th.th_next_pool = NULL;
4072
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004073 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004074 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004075 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4076 this_thr->th.th_task_state_top = 0;
4077 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004078 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4079 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004080 }
4081
Jim Cownie5e8470a2013-09-27 10:38:44 +00004082 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4083 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4084
4085 KMP_MB();
4086}
4087
4088
4089/* allocate a new thread for the requesting team. this is only called from within a
4090 * forkjoin critical section. we will first try to get an available thread from the
4091 * thread pool. if none is available, we will fork a new one assuming we are able
4092 * to create a new one. this should be assured, as the caller should check on this
4093 * first.
4094 */
4095kmp_info_t *
4096__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4097{
4098 kmp_team_t *serial_team;
4099 kmp_info_t *new_thr;
4100 int new_gtid;
4101
4102 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4103 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004104#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004105 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004106#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004107 KMP_MB();
4108
4109 /* first, try to get one from the thread pool */
4110 if ( __kmp_thread_pool ) {
4111
4112 new_thr = (kmp_info_t*)__kmp_thread_pool;
4113 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4114 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4115 __kmp_thread_pool_insert_pt = NULL;
4116 }
4117 TCW_4(new_thr->th.th_in_pool, FALSE);
4118 //
4119 // Don't touch th_active_in_pool or th_active.
4120 // The worker thread adjusts those flags as it sleeps/awakens.
4121 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 __kmp_thread_pool_nth--;
4123
4124 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4125 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004126 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4128 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4129
4130 /* setup the thread structure */
4131 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4132 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4133
4134 TCW_4(__kmp_nth, __kmp_nth + 1);
4135
Jonathan Peyton54127982015-11-04 21:37:48 +00004136 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004137 new_thr->th.th_task_state_top = 0;
4138 new_thr->th.th_task_state_stack_sz = 4;
4139
Jim Cownie5e8470a2013-09-27 10:38:44 +00004140#ifdef KMP_ADJUST_BLOCKTIME
4141 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004142 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004143 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4144 if ( __kmp_nth > __kmp_avail_proc ) {
4145 __kmp_zero_bt = TRUE;
4146 }
4147 }
4148#endif /* KMP_ADJUST_BLOCKTIME */
4149
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004150#if KMP_DEBUG
4151 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4152 int b;
4153 kmp_balign_t * balign = new_thr->th.th_bar;
4154 for( b = 0; b < bs_last_barrier; ++ b )
4155 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4156#endif
4157
Jim Cownie5e8470a2013-09-27 10:38:44 +00004158 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4159 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4160
4161 KMP_MB();
4162 return new_thr;
4163 }
4164
4165
4166 /* no, well fork a new one */
4167 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4168 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4169
4170 //
4171 // If this is the first worker thread the RTL is creating, then also
4172 // launch the monitor thread. We try to do this as early as possible.
4173 //
4174 if ( ! TCR_4( __kmp_init_monitor ) ) {
4175 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4176 if ( ! TCR_4( __kmp_init_monitor ) ) {
4177 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4178 TCW_4( __kmp_init_monitor, 1 );
4179 __kmp_create_monitor( & __kmp_monitor );
4180 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004181 #if KMP_OS_WINDOWS
4182 // AC: wait until monitor has started. This is a fix for CQ232808.
4183 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4184 // work in between, then there is high probability that monitor thread started after
4185 // the library shutdown. At shutdown it is too late to cope with the problem, because
4186 // when the master is in DllMain (process detach) the monitor has no chances to start
4187 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4188 // because all the memory which the monitor can access is going to be released/reset.
4189 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4190 KMP_YIELD( TRUE );
4191 }
4192 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4193 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194 }
4195 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4196 }
4197
4198 KMP_MB();
4199 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4200 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4201 }
4202
4203 /* allocate space for it. */
4204 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4205
4206 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4207
4208 if ( __kmp_storage_map ) {
4209 __kmp_print_thread_storage_map( new_thr, new_gtid );
4210 }
4211
4212 /* add the reserve serialized team, initialized from the team's master thread */
4213 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004214 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004215 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004216
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004217 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004219#if OMPT_SUPPORT
4220 0, // root parallel id
4221#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222#if OMP_40_ENABLED
4223 proc_bind_default,
4224#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004226 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004227 }
4228 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4230 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004231 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4232 new_thr ) );
4233
4234 /* setup the thread structures */
4235 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4236
4237 #if USE_FAST_MEMORY
4238 __kmp_initialize_fast_memory( new_thr );
4239 #endif /* USE_FAST_MEMORY */
4240
4241 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004242 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004243 __kmp_initialize_bget( new_thr );
4244 #endif
4245
4246 __kmp_init_random( new_thr ); // Initialize random number generator
4247
4248 /* Initialize these only once when thread is grabbed for a team allocation */
4249 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4250 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4251
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004252 int b;
4253 kmp_balign_t * balign = new_thr->th.th_bar;
4254 for(b=0; b<bs_last_barrier; ++b) {
4255 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4256 balign[b].bb.team = NULL;
4257 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4258 balign[b].bb.use_oncore_barrier = 0;
4259 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004260
4261 new_thr->th.th_spin_here = FALSE;
4262 new_thr->th.th_next_waiting = 0;
4263
Alp Toker98758b02014-03-02 04:12:06 +00004264#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004265 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4266 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4267 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4268 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4269#endif
4270
4271 TCW_4(new_thr->th.th_in_pool, FALSE);
4272 new_thr->th.th_active_in_pool = FALSE;
4273 TCW_4(new_thr->th.th_active, TRUE);
4274
4275 /* adjust the global counters */
4276 __kmp_all_nth ++;
4277 __kmp_nth ++;
4278
4279 //
4280 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4281 // for low numbers of procs, and method #2 (keyed API call) for higher
4282 // numbers of procs.
4283 //
4284 if ( __kmp_adjust_gtid_mode ) {
4285 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4286 if ( TCR_4(__kmp_gtid_mode) != 2) {
4287 TCW_4(__kmp_gtid_mode, 2);
4288 }
4289 }
4290 else {
4291 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4292 TCW_4(__kmp_gtid_mode, 1);
4293 }
4294 }
4295 }
4296
4297#ifdef KMP_ADJUST_BLOCKTIME
4298 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004299 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004300 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4301 if ( __kmp_nth > __kmp_avail_proc ) {
4302 __kmp_zero_bt = TRUE;
4303 }
4304 }
4305#endif /* KMP_ADJUST_BLOCKTIME */
4306
4307 /* actually fork it and create the new worker thread */
4308 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4309 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4310 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4311
Jim Cownie5e8470a2013-09-27 10:38:44 +00004312 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4313 KMP_MB();
4314 return new_thr;
4315}
4316
4317/*
4318 * reinitialize team for reuse.
4319 *
4320 * The hot team code calls this case at every fork barrier, so EPCC barrier
4321 * test are extremely sensitive to changes in it, esp. writes to the team
4322 * struct, which cause a cache invalidation in all threads.
4323 *
4324 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4325 */
4326static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004327__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004328 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4329 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004330 KMP_DEBUG_ASSERT( team && new_icvs);
4331 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004332 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004333
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004334 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004335
Jim Cownie181b4bb2013-12-23 17:28:57 +00004336 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004337 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004338 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004339
4340 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4341 team->t.t_threads[0], team ) );
4342}
4343
Jim Cownie5e8470a2013-09-27 10:38:44 +00004344
4345/* initialize the team data structure
4346 * this assumes the t_threads and t_max_nproc are already set
4347 * also, we don't touch the arguments */
4348static void
4349__kmp_initialize_team(
4350 kmp_team_t * team,
4351 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004352 kmp_internal_control_t * new_icvs,
4353 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004354) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004355 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4356
Jim Cownie5e8470a2013-09-27 10:38:44 +00004357 /* verify */
4358 KMP_DEBUG_ASSERT( team );
4359 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4360 KMP_DEBUG_ASSERT( team->t.t_threads );
4361 KMP_MB();
4362
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004363 team->t.t_master_tid = 0; /* not needed */
4364 /* team->t.t_master_bar; not needed */
4365 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4366 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004367
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004368 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4369 team->t.t_next_pool = NULL;
4370 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004371
4372 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004373 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004374
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004375 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4376 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004377
4378#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004379 team->t.t_fp_control_saved = FALSE; /* not needed */
4380 team->t.t_x87_fpu_control_word = 0; /* not needed */
4381 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004382#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4383
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004384 team->t.t_construct = 0;
4385 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004387 team->t.t_ordered .dt.t_value = 0;
4388 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004389
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004391
4392#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004393 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004395 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004396
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004397 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004399 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004400
4401 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004402 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403}
4404
Alp Toker98758b02014-03-02 04:12:06 +00004405#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406/* Sets full mask for thread and returns old mask, no changes to structures. */
4407static void
4408__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4409{
4410 if ( KMP_AFFINITY_CAPABLE() ) {
4411 int status;
4412 if ( old_mask != NULL ) {
4413 status = __kmp_get_system_affinity( old_mask, TRUE );
4414 int error = errno;
4415 if ( status != 0 ) {
4416 __kmp_msg(
4417 kmp_ms_fatal,
4418 KMP_MSG( ChangeThreadAffMaskError ),
4419 KMP_ERR( error ),
4420 __kmp_msg_null
4421 );
4422 }
4423 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004424 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425 }
4426}
4427#endif
4428
Alp Toker98758b02014-03-02 04:12:06 +00004429#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430
4431//
4432// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4433// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004434// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435// The master thread's partition should already include its current binding.
4436//
4437static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004438__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004439{
4440 //
4441 // Copy the master thread's place partion to the team struct
4442 //
4443 kmp_info_t *master_th = team->t.t_threads[0];
4444 KMP_DEBUG_ASSERT( master_th != NULL );
4445 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4446 int first_place = master_th->th.th_first_place;
4447 int last_place = master_th->th.th_last_place;
4448 int masters_place = master_th->th.th_current_place;
4449 team->t.t_first_place = first_place;
4450 team->t.t_last_place = last_place;
4451
4452 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4453 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4454 masters_place, first_place, last_place ) );
4455
4456 switch ( proc_bind ) {
4457
4458 case proc_bind_default:
4459 //
4460 // serial teams might have the proc_bind policy set to
4461 // proc_bind_default. It doesn't matter, as we don't
4462 // rebind the master thread for any proc_bind policy.
4463 //
4464 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4465 break;
4466
4467 case proc_bind_master:
4468 {
4469 int f;
4470 int n_th = team->t.t_nproc;
4471 for ( f = 1; f < n_th; f++ ) {
4472 kmp_info_t *th = team->t.t_threads[f];
4473 KMP_DEBUG_ASSERT( th != NULL );
4474 th->th.th_first_place = first_place;
4475 th->th.th_last_place = last_place;
4476 th->th.th_new_place = masters_place;
4477
4478 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4479 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4480 team->t.t_id, f, masters_place, first_place, last_place ) );
4481 }
4482 }
4483 break;
4484
4485 case proc_bind_close:
4486 {
4487 int f;
4488 int n_th = team->t.t_nproc;
4489 int n_places;
4490 if ( first_place <= last_place ) {
4491 n_places = last_place - first_place + 1;
4492 }
4493 else {
4494 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4495 }
4496 if ( n_th <= n_places ) {
4497 int place = masters_place;
4498 for ( f = 1; f < n_th; f++ ) {
4499 kmp_info_t *th = team->t.t_threads[f];
4500 KMP_DEBUG_ASSERT( th != NULL );
4501
4502 if ( place == last_place ) {
4503 place = first_place;
4504 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004505 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004506 place = 0;
4507 }
4508 else {
4509 place++;
4510 }
4511 th->th.th_first_place = first_place;
4512 th->th.th_last_place = last_place;
4513 th->th.th_new_place = place;
4514
4515 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4516 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4517 team->t.t_id, f, place, first_place, last_place ) );
4518 }
4519 }
4520 else {
4521 int S, rem, gap, s_count;
4522 S = n_th / n_places;
4523 s_count = 0;
4524 rem = n_th - ( S * n_places );
4525 gap = rem > 0 ? n_places/rem : n_places;
4526 int place = masters_place;
4527 int gap_ct = gap;
4528 for ( f = 0; f < n_th; f++ ) {
4529 kmp_info_t *th = team->t.t_threads[f];
4530 KMP_DEBUG_ASSERT( th != NULL );
4531
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4535 s_count++;
4536
4537 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4538 // do nothing, add an extra thread to place on next iteration
4539 }
4540 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4541 // we added an extra thread to this place; move to next place
4542 if ( place == last_place ) {
4543 place = first_place;
4544 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004545 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004546 place = 0;
4547 }
4548 else {
4549 place++;
4550 }
4551 s_count = 0;
4552 gap_ct = 1;
4553 rem--;
4554 }
4555 else if (s_count == S) { // place full; don't add extra
4556 if ( place == last_place ) {
4557 place = first_place;
4558 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004559 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004560 place = 0;
4561 }
4562 else {
4563 place++;
4564 }
4565 gap_ct++;
4566 s_count = 0;
4567 }
4568
4569 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4570 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4571 team->t.t_id, f, th->th.th_new_place, first_place,
4572 last_place ) );
4573 }
4574 KMP_DEBUG_ASSERT( place == masters_place );
4575 }
4576 }
4577 break;
4578
4579 case proc_bind_spread:
4580 {
4581 int f;
4582 int n_th = team->t.t_nproc;
4583 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004584 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004585 if ( first_place <= last_place ) {
4586 n_places = last_place - first_place + 1;
4587 }
4588 else {
4589 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4590 }
4591 if ( n_th <= n_places ) {
4592 int place = masters_place;
4593 int S = n_places/n_th;
4594 int s_count, rem, gap, gap_ct;
4595 rem = n_places - n_th*S;
4596 gap = rem ? n_th/rem : 1;
4597 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004598 thidx = n_th;
4599 if (update_master_only == 1)
4600 thidx = 1;
4601 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004602 kmp_info_t *th = team->t.t_threads[f];
4603 KMP_DEBUG_ASSERT( th != NULL );
4604
4605 th->th.th_first_place = place;
4606 th->th.th_new_place = place;
4607 s_count = 1;
4608 while (s_count < S) {
4609 if ( place == last_place ) {
4610 place = first_place;
4611 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004612 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004613 place = 0;
4614 }
4615 else {
4616 place++;
4617 }
4618 s_count++;
4619 }
4620 if (rem && (gap_ct == gap)) {
4621 if ( place == last_place ) {
4622 place = first_place;
4623 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004624 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004625 place = 0;
4626 }
4627 else {
4628 place++;
4629 }
4630 rem--;
4631 gap_ct = 0;
4632 }
4633 th->th.th_last_place = place;
4634 gap_ct++;
4635
4636 if ( place == last_place ) {
4637 place = first_place;
4638 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004639 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004640 place = 0;
4641 }
4642 else {
4643 place++;
4644 }
4645
4646 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place ) );
4650 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004651 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004652 }
4653 else {
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4656 s_count = 0;
4657 rem = n_th - ( S * n_places );
4658 gap = rem > 0 ? n_places/rem : n_places;
4659 int place = masters_place;
4660 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004661 thidx = n_th;
4662 if (update_master_only == 1)
4663 thidx = 1;
4664 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT( th != NULL );
4667
4668 th->th.th_first_place = place;
4669 th->th.th_last_place = place;
4670 th->th.th_new_place = place;
4671 s_count++;
4672
4673 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4674 // do nothing, add an extra thread to place on next iteration
4675 }
4676 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4677 // we added an extra thread to this place; move on to next place
4678 if ( place == last_place ) {
4679 place = first_place;
4680 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004681 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004682 place = 0;
4683 }
4684 else {
4685 place++;
4686 }
4687 s_count = 0;
4688 gap_ct = 1;
4689 rem--;
4690 }
4691 else if (s_count == S) { // place is full; don't add extra thread
4692 if ( place == last_place ) {
4693 place = first_place;
4694 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004695 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004696 place = 0;
4697 }
4698 else {
4699 place++;
4700 }
4701 gap_ct++;
4702 s_count = 0;
4703 }
4704
4705 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4706 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4707 team->t.t_id, f, th->th.th_new_place,
4708 th->th.th_first_place, th->th.th_last_place) );
4709 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004710 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004711 }
4712 }
4713 break;
4714
4715 default:
4716 break;
4717 }
4718
4719 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4720}
4721
Alp Toker98758b02014-03-02 04:12:06 +00004722#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004723
4724/* allocate a new team data structure to use. take one off of the free pool if available */
4725kmp_team_t *
4726__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004727#if OMPT_SUPPORT
4728 ompt_parallel_id_t ompt_parallel_id,
4729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004730#if OMP_40_ENABLED
4731 kmp_proc_bind_t new_proc_bind,
4732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004733 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004734 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004735{
Jonathan Peyton45be4502015-08-11 21:36:41 +00004736 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004737 int f;
4738 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004739 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004740 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004741
4742 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4743 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4744 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4745 KMP_MB();
4746
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004747#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004748 kmp_hot_team_ptr_t *hot_teams;
4749 if( master ) {
4750 team = master->th.th_team;
4751 level = team->t.t_active_level;
4752 if( master->th.th_teams_microtask ) { // in teams construct?
4753 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4754 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4755 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4756 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4757 }
4758 }
4759 hot_teams = master->th.th_hot_teams;
4760 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4761 { // hot team has already been allocated for given level
4762 use_hot_team = 1;
4763 } else {
4764 use_hot_team = 0;
4765 }
4766 }
4767#endif
4768 // Optimization to use a "hot" team
4769 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004770 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004771#if KMP_NESTED_HOT_TEAMS
4772 team = hot_teams[level].hot_team;
4773#else
4774 team = root->r.r_hot_team;
4775#endif
4776#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004777 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004778 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4779 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004780 }
4781#endif
4782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004783 // Has the number of threads changed?
4784 /* Let's assume the most common case is that the number of threads is unchanged, and
4785 put that case first. */
4786 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4787 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004788 // This case can mean that omp_set_num_threads() was called and the hot team size
4789 // was already reduced, so we check the special flag
4790 if ( team->t.t_size_changed == -1 ) {
4791 team->t.t_size_changed = 1;
4792 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004794 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004795
4796 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004797 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4799 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004800 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004801
4802 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4803
4804 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4805 0, team->t.t_threads[0], team ) );
4806 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4807
4808#if OMP_40_ENABLED
4809# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004810 if ( ( team->t.t_size_changed == 0 )
4811 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004812 if (new_proc_bind == proc_bind_spread) {
4813 __kmp_partition_places(team, 1); // add flag to update only master for spread
4814 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004815 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4816 team->t.t_id, new_proc_bind, team->t.t_first_place,
4817 team->t.t_last_place ) );
4818 }
4819 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004820 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004821 __kmp_partition_places( team );
4822 }
4823# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004824 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004825# endif /* KMP_AFFINITY_SUPPORTED */
4826#endif /* OMP_40_ENABLED */
4827 }
4828 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004829 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4830
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004831 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004832#if KMP_NESTED_HOT_TEAMS
4833 if( __kmp_hot_teams_mode == 0 ) {
4834 // AC: saved number of threads should correspond to team's value in this mode,
4835 // can be bigger in mode 1, when hot team has some threads in reserve
4836 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4837 hot_teams[level].hot_team_nth = new_nproc;
4838#endif // KMP_NESTED_HOT_TEAMS
4839 /* release the extra threads we don't need any more */
4840 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4841 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004842 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4843 // When decreasing team size, threads no longer in the team should unref task team.
4844 team->t.t_threads[f]->th.th_task_team = NULL;
4845 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004846 __kmp_free_thread( team->t.t_threads[ f ] );
4847 team->t.t_threads[ f ] = NULL;
4848 }
4849#if KMP_NESTED_HOT_TEAMS
4850 } // (__kmp_hot_teams_mode == 0)
4851#endif // KMP_NESTED_HOT_TEAMS
4852 team->t.t_nproc = new_nproc;
4853 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004854 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4855 team->t.t_sched.chunk != new_icvs->sched.chunk)
4856 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004857 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004858
Jim Cownie5e8470a2013-09-27 10:38:44 +00004859 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004860 for(f = 0; f < new_nproc; ++f) {
4861 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004862 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004863 // restore the current task state of the master thread: should be the implicit task
4864 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4865 0, team->t.t_threads[0], team ) );
4866
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004867 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004868
4869#ifdef KMP_DEBUG
4870 for ( f = 0; f < team->t.t_nproc; f++ ) {
4871 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4872 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4873 }
4874#endif
4875
4876#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004877 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004878# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004879 __kmp_partition_places( team );
4880# endif
4881#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004882 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004883 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004884#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004885 kmp_affin_mask_t *old_mask;
4886 if ( KMP_AFFINITY_CAPABLE() ) {
4887 KMP_CPU_ALLOC(old_mask);
4888 }
4889#endif
4890
4891 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4892
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004893 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004895#if KMP_NESTED_HOT_TEAMS
4896 int avail_threads = hot_teams[level].hot_team_nth;
4897 if( new_nproc < avail_threads )
4898 avail_threads = new_nproc;
4899 kmp_info_t **other_threads = team->t.t_threads;
4900 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4901 // Adjust barrier data of reserved threads (if any) of the team
4902 // Other data will be set in __kmp_initialize_info() below.
4903 int b;
4904 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4905 for ( b = 0; b < bs_last_barrier; ++ b ) {
4906 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4907 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004908#if USE_DEBUGGER
4909 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4910#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004911 }
4912 }
4913 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4914 // we have all needed threads in reserve, no need to allocate any
4915 // this only possible in mode 1, cannot have reserved threads in mode 0
4916 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4917 team->t.t_nproc = new_nproc; // just get reserved threads involved
4918 } else {
4919 // we may have some threads in reserve, but not enough
4920 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4921 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4922#endif // KMP_NESTED_HOT_TEAMS
4923 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004924 /* reallocate larger arrays */
4925 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004926 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004927 }
4928
Alp Toker98758b02014-03-02 04:12:06 +00004929#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004930 /* Temporarily set full mask for master thread before
4931 creation of workers. The reason is that workers inherit
4932 the affinity from master, so if a lot of workers are
4933 created on the single core quickly, they don't get
4934 a chance to set their own affinity for a long time.
4935 */
4936 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4937#endif
4938
4939 /* allocate new threads for the hot team */
4940 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4941 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4942 KMP_DEBUG_ASSERT( new_worker );
4943 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004944
Jonathan Peytond26e2132015-09-10 18:44:30 +00004945 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004946 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4947 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4948 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4949
4950 { // Initialize barrier data for new threads.
4951 int b;
4952 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004953 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004954 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004955 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004956#if USE_DEBUGGER
4957 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4958#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004959 }
4960 }
4961 }
4962
Alp Toker98758b02014-03-02 04:12:06 +00004963#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004964 if ( KMP_AFFINITY_CAPABLE() ) {
4965 /* Restore initial master thread's affinity mask */
4966 __kmp_set_system_affinity( old_mask, TRUE );
4967 KMP_CPU_FREE(old_mask);
4968 }
4969#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004970#if KMP_NESTED_HOT_TEAMS
4971 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4972#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004973 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00004974 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004975 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004976
Jonathan Peytone03b62f2015-10-08 18:49:40 +00004977 /* reinitialize the threads */
4978 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00004979 for (f=0; f < team->t.t_nproc; ++f)
4980 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4981 if (level) { // set th_task_state for new threads in nested hot team
4982 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00004983 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00004984 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
4985 for (f=old_nproc; f < team->t.t_nproc; ++f)
4986 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004987 }
Jonathan Peyton54127982015-11-04 21:37:48 +00004988 else { // set th_task_state for new threads in non-nested hot team
4989 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
4990 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004991 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004992 }
4993
Jim Cownie5e8470a2013-09-27 10:38:44 +00004994#ifdef KMP_DEBUG
4995 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4996 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4997 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4998 }
4999#endif
5000
5001#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005002 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005003# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005004 __kmp_partition_places( team );
5005# endif
5006#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005007 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005008
5009#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005010 kmp_info_t *master = team->t.t_threads[0];
5011 if( master->th.th_teams_microtask ) {
5012 for( f = 1; f < new_nproc; ++f ) {
5013 // propagate teams construct specific info to workers
5014 kmp_info_t *thr = team->t.t_threads[f];
5015 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5016 thr->th.th_teams_level = master->th.th_teams_level;
5017 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005018 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005019 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005020#endif /* OMP_40_ENABLED */
5021#if KMP_NESTED_HOT_TEAMS
5022 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005023 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005024 for( f = 1; f < new_nproc; ++f ) {
5025 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005026 int b;
5027 kmp_balign_t * balign = thr->th.th_bar;
5028 for( b = 0; b < bs_last_barrier; ++ b ) {
5029 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5030 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005031#if USE_DEBUGGER
5032 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5033#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005034 }
5035 }
5036 }
5037#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005038
5039 /* reallocate space for arguments if necessary */
5040 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005041 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005042 //
5043 // The hot team re-uses the previous task team,
5044 // if untouched during the previous release->gather phase.
5045 //
5046
5047 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5048
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005049#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005050 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005051 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5052 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005053 }
5054#endif
5055
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005056#if OMPT_SUPPORT
5057 __ompt_team_assign_id(team, ompt_parallel_id);
5058#endif
5059
Jim Cownie5e8470a2013-09-27 10:38:44 +00005060 KMP_MB();
5061
5062 return team;
5063 }
5064
5065 /* next, let's try to take one from the team pool */
5066 KMP_MB();
5067 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5068 {
5069 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5070 if ( team->t.t_max_nproc >= max_nproc ) {
5071 /* take this team from the team pool */
5072 __kmp_team_pool = team->t.t_next_pool;
5073
5074 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005075 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005076
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005077 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5078 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5079 team->t.t_task_team[0] = NULL;
5080 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005081
5082 /* reallocate space for arguments if necessary */
5083 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005084 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005085
5086 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5087 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5088 { // Initialize barrier data.
5089 int b;
5090 for ( b = 0; b < bs_last_barrier; ++ b) {
5091 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005092#if USE_DEBUGGER
5093 team->t.t_bar[ b ].b_master_arrived = 0;
5094 team->t.t_bar[ b ].b_team_arrived = 0;
5095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005096 }
5097 }
5098
5099#if OMP_40_ENABLED
5100 team->t.t_proc_bind = new_proc_bind;
5101#endif
5102
5103 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005104
5105#if OMPT_SUPPORT
5106 __ompt_team_assign_id(team, ompt_parallel_id);
5107#endif
5108
Jim Cownie5e8470a2013-09-27 10:38:44 +00005109 KMP_MB();
5110
5111 return team;
5112 }
5113
5114 /* reap team if it is too small, then loop back and check the next one */
5115 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5116 /* TODO: Use technique to find the right size hot-team, don't reap them */
5117 team = __kmp_reap_team( team );
5118 __kmp_team_pool = team;
5119 }
5120
5121 /* nothing available in the pool, no matter, make a new team! */
5122 KMP_MB();
5123 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5124
5125 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005126 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005127 /* NOTE well, for some reason allocating one big buffer and dividing it
5128 * up seems to really hurt performance a lot on the P4, so, let's not use
5129 * this... */
5130 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005131
5132 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005133 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005134
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005135 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5136 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5137 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5138 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005139
5140 if ( __kmp_storage_map ) {
5141 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5142 }
5143
5144 /* allocate space for arguments */
5145 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005146 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005147
5148 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5149 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5150 { // Initialize barrier data.
5151 int b;
5152 for ( b = 0; b < bs_last_barrier; ++ b ) {
5153 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005154#if USE_DEBUGGER
5155 team->t.t_bar[ b ].b_master_arrived = 0;
5156 team->t.t_bar[ b ].b_team_arrived = 0;
5157#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005158 }
5159 }
5160
5161#if OMP_40_ENABLED
5162 team->t.t_proc_bind = new_proc_bind;
5163#endif
5164
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005165#if OMPT_SUPPORT
5166 __ompt_team_assign_id(team, ompt_parallel_id);
5167 team->t.ompt_serialized_team_info = NULL;
5168#endif
5169
Jim Cownie5e8470a2013-09-27 10:38:44 +00005170 KMP_MB();
5171
5172 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5173
5174 return team;
5175}
5176
5177/* TODO implement hot-teams at all levels */
5178/* TODO implement lazy thread release on demand (disband request) */
5179
5180/* free the team. return it to the team pool. release all the threads
5181 * associated with it */
5182void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005183__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005184{
5185 int f;
5186 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5187
5188 /* verify state */
5189 KMP_DEBUG_ASSERT( root );
5190 KMP_DEBUG_ASSERT( team );
5191 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5192 KMP_DEBUG_ASSERT( team->t.t_threads );
5193
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005194 int use_hot_team = team == root->r.r_hot_team;
5195#if KMP_NESTED_HOT_TEAMS
5196 int level;
5197 kmp_hot_team_ptr_t *hot_teams;
5198 if( master ) {
5199 level = team->t.t_active_level - 1;
5200 if( master->th.th_teams_microtask ) { // in teams construct?
5201 if( master->th.th_teams_size.nteams > 1 ) {
5202 ++level; // level was not increased in teams construct for team_of_masters
5203 }
5204 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5205 master->th.th_teams_level == team->t.t_level ) {
5206 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5207 } // team->t.t_level will be increased inside parallel
5208 }
5209 hot_teams = master->th.th_hot_teams;
5210 if( level < __kmp_hot_teams_max_level ) {
5211 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5212 use_hot_team = 1;
5213 }
5214 }
5215#endif // KMP_NESTED_HOT_TEAMS
5216
Jim Cownie5e8470a2013-09-27 10:38:44 +00005217 /* team is done working */
5218 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005219 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005220 // Do not reset pointer to parent team to NULL for hot teams.
5221
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005222 /* if we are non-hot team, release our threads */
5223 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005224 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005225 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005226 int tt_idx;
5227 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005228 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5229 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005230 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5231 team->t.t_threads[f]->th.th_task_team = NULL;
5232 }
5233 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005234#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005235 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005236#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005237 team->t.t_task_team[tt_idx] = NULL;
5238 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005239 }
5240 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005241
5242 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005243 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005244 team->t.t_level = 0;
5245 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005246
Jim Cownie5e8470a2013-09-27 10:38:44 +00005247 /* free the worker threads */
5248 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5249 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5250 __kmp_free_thread( team->t.t_threads[ f ] );
5251 team->t.t_threads[ f ] = NULL;
5252 }
5253
Jim Cownie5e8470a2013-09-27 10:38:44 +00005254 /* put the team back in the team pool */
5255 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005256 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005257 __kmp_team_pool = (volatile kmp_team_t*) team;
5258 }
5259
5260 KMP_MB();
5261}
5262
5263
5264/* reap the team. destroy it, reclaim all its resources and free its memory */
5265kmp_team_t *
5266__kmp_reap_team( kmp_team_t *team )
5267{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005268 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005269
5270 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005271 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5272 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5273 KMP_DEBUG_ASSERT( team->t.t_threads );
5274 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005275
5276 /* TODO clean the threads that are a part of this? */
5277
5278 /* free stuff */
5279
5280 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005281 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5282 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005283 __kmp_free( team );
5284
5285 KMP_MB();
5286 return next_pool;
5287}
5288
5289//
5290// Free the thread. Don't reap it, just place it on the pool of available
5291// threads.
5292//
5293// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5294// binding for the affinity mechanism to be useful.
5295//
5296// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5297// However, we want to avoid a potential performance problem by always
5298// scanning through the list to find the correct point at which to insert
5299// the thread (potential N**2 behavior). To do this we keep track of the
5300// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5301// With single-level parallelism, threads will always be added to the tail
5302// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5303// parallelism, all bets are off and we may need to scan through the entire
5304// free list.
5305//
5306// This change also has a potentially large performance benefit, for some
5307// applications. Previously, as threads were freed from the hot team, they
5308// would be placed back on the free list in inverse order. If the hot team
5309// grew back to it's original size, then the freed thread would be placed
5310// back on the hot team in reverse order. This could cause bad cache
5311// locality problems on programs where the size of the hot team regularly
5312// grew and shrunk.
5313//
5314// Now, for single-level parallelism, the OMP tid is alway == gtid.
5315//
5316void
5317__kmp_free_thread( kmp_info_t *this_th )
5318{
5319 int gtid;
5320 kmp_info_t **scan;
5321
5322 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5323 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5324
5325 KMP_DEBUG_ASSERT( this_th );
5326
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005327 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5328 int b;
5329 kmp_balign_t *balign = this_th->th.th_bar;
5330 for (b=0; b<bs_last_barrier; ++b) {
5331 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5332 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5333 balign[b].bb.team = NULL;
5334 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005335 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005336
Jim Cownie5e8470a2013-09-27 10:38:44 +00005337 /* put thread back on the free pool */
5338 TCW_PTR(this_th->th.th_team, NULL);
5339 TCW_PTR(this_th->th.th_root, NULL);
5340 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5341
5342 //
5343 // If the __kmp_thread_pool_insert_pt is already past the new insert
5344 // point, then we need to re-scan the entire list.
5345 //
5346 gtid = this_th->th.th_info.ds.ds_gtid;
5347 if ( __kmp_thread_pool_insert_pt != NULL ) {
5348 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5349 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5350 __kmp_thread_pool_insert_pt = NULL;
5351 }
5352 }
5353
5354 //
5355 // Scan down the list to find the place to insert the thread.
5356 // scan is the address of a link in the list, possibly the address of
5357 // __kmp_thread_pool itself.
5358 //
5359 // In the absence of nested parallism, the for loop will have 0 iterations.
5360 //
5361 if ( __kmp_thread_pool_insert_pt != NULL ) {
5362 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5363 }
5364 else {
5365 scan = (kmp_info_t **)&__kmp_thread_pool;
5366 }
5367 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5368 scan = &( (*scan)->th.th_next_pool ) );
5369
5370 //
5371 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5372 // to its address.
5373 //
5374 TCW_PTR(this_th->th.th_next_pool, *scan);
5375 __kmp_thread_pool_insert_pt = *scan = this_th;
5376 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5377 || ( this_th->th.th_info.ds.ds_gtid
5378 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5379 TCW_4(this_th->th.th_in_pool, TRUE);
5380 __kmp_thread_pool_nth++;
5381
5382 TCW_4(__kmp_nth, __kmp_nth - 1);
5383
5384#ifdef KMP_ADJUST_BLOCKTIME
5385 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005386 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005387 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5388 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5389 if ( __kmp_nth <= __kmp_avail_proc ) {
5390 __kmp_zero_bt = FALSE;
5391 }
5392 }
5393#endif /* KMP_ADJUST_BLOCKTIME */
5394
5395 KMP_MB();
5396}
5397
Jim Cownie5e8470a2013-09-27 10:38:44 +00005398
Jim Cownie5e8470a2013-09-27 10:38:44 +00005399/* ------------------------------------------------------------------------ */
5400
5401void *
5402__kmp_launch_thread( kmp_info_t *this_thr )
5403{
5404 int gtid = this_thr->th.th_info.ds.ds_gtid;
5405/* void *stack_data;*/
5406 kmp_team_t *(*volatile pteam);
5407
5408 KMP_MB();
5409 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5410
5411 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005412 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005413 }
5414
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005415#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005416 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005417 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5418 this_thr->th.ompt_thread_info.wait_id = 0;
5419 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005420 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005421 __ompt_thread_begin(ompt_thread_worker, gtid);
5422 }
5423 }
5424#endif
5425
Jim Cownie5e8470a2013-09-27 10:38:44 +00005426 /* This is the place where threads wait for work */
5427 while( ! TCR_4(__kmp_global.g.g_done) ) {
5428 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5429 KMP_MB();
5430
5431 /* wait for work to do */
5432 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5433
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005434#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005435 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005436 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5437 }
5438#endif
5439
Jim Cownie5e8470a2013-09-27 10:38:44 +00005440 /* No tid yet since not part of a team */
5441 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5442
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005443#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005444 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005445 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5446 }
5447#endif
5448
Jim Cownie5e8470a2013-09-27 10:38:44 +00005449 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5450
5451 /* have we been allocated? */
5452 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005453#if OMPT_SUPPORT
5454 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005455 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005456 if (ompt_enabled) {
5457 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005458 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005459 }
5460#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005461 /* we were just woken up, so run our new task */
5462 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5463 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005464 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5465 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005466
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005467 updateHWFPControl (*pteam);
5468
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005469#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005470 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005471 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005472 // Initialize OMPT task id for implicit task.
5473 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005474 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005475 }
5476#endif
5477
Jonathan Peyton45be4502015-08-11 21:36:41 +00005478 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005479 {
Jonathan Peyton45be4502015-08-11 21:36:41 +00005480 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005481 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5482 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005483 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005484 }
Jonathan Peyton45be4502015-08-11 21:36:41 +00005485 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486 KMP_ASSERT( rc );
5487
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005488#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005489 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005490 /* no frame set while outside task */
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005491 task_info->frame.exit_runtime_frame = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005492
5493 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5494 }
5495#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005496 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005497 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5498 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005499 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005500 /* join barrier after parallel region */
5501 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005502#if OMPT_SUPPORT && OMPT_TRACE
5503 if (ompt_enabled) {
5504 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005505 // don't access *pteam here: it may have already been freed
5506 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005507 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5508 my_parallel_id, task_info->task_id);
5509 }
5510 task_info->frame.exit_runtime_frame = 0;
5511 task_info->task_id = 0;
5512 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005513#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005514 }
5515 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005516 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005517
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005518#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005519 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005520 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5521 __ompt_thread_end(ompt_thread_worker, gtid);
5522 }
5523#endif
5524
Jonathan Peyton54127982015-11-04 21:37:48 +00005525 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005526 /* run the destructors for the threadprivate data for this thread */
5527 __kmp_common_destroy_gtid( gtid );
5528
5529 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5530 KMP_MB();
5531 return this_thr;
5532}
5533
5534/* ------------------------------------------------------------------------ */
5535/* ------------------------------------------------------------------------ */
5536
Jim Cownie5e8470a2013-09-27 10:38:44 +00005537void
5538__kmp_internal_end_dest( void *specific_gtid )
5539{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005540 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005541 #pragma warning( push )
5542 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5543 #endif
5544 // Make sure no significant bits are lost
5545 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005546 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005547 #pragma warning( pop )
5548 #endif
5549
5550 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5551 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5552 * this is because 0 is reserved for the nothing-stored case */
5553
5554 /* josh: One reason for setting the gtid specific data even when it is being
5555 destroyed by pthread is to allow gtid lookup through thread specific data
5556 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5557 that gets executed in the call to __kmp_internal_end_thread, actually
5558 gets the gtid through the thread specific data. Setting it here seems
5559 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5560 to run smoothly.
5561 todo: get rid of this after we remove the dependence on
5562 __kmp_gtid_get_specific
5563 */
5564 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5565 __kmp_gtid_set_specific( gtid );
5566 #ifdef KMP_TDATA_GTID
5567 __kmp_gtid = gtid;
5568 #endif
5569 __kmp_internal_end_thread( gtid );
5570}
5571
Jonathan Peyton99016992015-05-26 17:32:53 +00005572#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005573
5574// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005575// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005576// option in makefile.mk works fine.
5577
5578__attribute__(( destructor ))
5579void
5580__kmp_internal_end_dtor( void )
5581{
5582 __kmp_internal_end_atexit();
5583}
5584
5585void
5586__kmp_internal_end_fini( void )
5587{
5588 __kmp_internal_end_atexit();
5589}
5590
5591#endif
5592
5593/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5594void
5595__kmp_internal_end_atexit( void )
5596{
5597 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5598 /* [Windows]
5599 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5600 stat code that depends on thread specific data for gtid fails because that data becomes
5601 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5602 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5603 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5604
5605// TODO: Can some of this comment about GVS be removed?
5606 I suspect that the offending stat code is executed when the calling thread tries to
5607 clean up a dead root thread's data structures, resulting in GVS code trying to close
5608 the GVS structures for that thread, but since the stat code uses
5609 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5610 cleaning up itself instead of another thread, it gets confused. This happens because
5611 allowing a thread to unregister and cleanup another thread is a recent modification for
5612 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5613 thread may end up trying to unregister another thread only if thread death does not
5614 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5615 specific data destructor function to detect thread death. For Windows dynamic, there
5616 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5617 workaround is applicable only for Windows static stat library.
5618 */
5619 __kmp_internal_end_library( -1 );
5620 #if KMP_OS_WINDOWS
5621 __kmp_close_console();
5622 #endif
5623}
5624
5625static void
5626__kmp_reap_thread(
5627 kmp_info_t * thread,
5628 int is_root
5629) {
5630
Alp Toker8f2d3f02014-02-24 10:40:15 +00005631 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005632
5633 int gtid;
5634
5635 KMP_DEBUG_ASSERT( thread != NULL );
5636
5637 gtid = thread->th.th_info.ds.ds_gtid;
5638
5639 if ( ! is_root ) {
5640
5641 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5642 /* Assume the threads are at the fork barrier here */
5643 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5644 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005645 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5646 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005647 }; // if
5648
Jim Cownie5e8470a2013-09-27 10:38:44 +00005649 // Terminate OS thread.
5650 __kmp_reap_worker( thread );
5651
5652 //
5653 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005654 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005655 //
5656 // There is a small timing hole here - if the worker thread was
5657 // just waking up after sleeping in the pool, had reset it's
5658 // th_active_in_pool flag but not decremented the global counter
5659 // __kmp_thread_pool_active_nth yet, then the global counter
5660 // might not get updated.
5661 //
5662 // Currently, this can only happen as the library is unloaded,
5663 // so there are no harmful side effects.
5664 //
5665 if ( thread->th.th_active_in_pool ) {
5666 thread->th.th_active_in_pool = FALSE;
5667 KMP_TEST_THEN_DEC32(
5668 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5669 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5670 }
5671
5672 // Decrement # of [worker] threads in the pool.
5673 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5674 --__kmp_thread_pool_nth;
5675 }; // if
5676
5677 // Free the fast memory for tasking
5678 #if USE_FAST_MEMORY
5679 __kmp_free_fast_memory( thread );
5680 #endif /* USE_FAST_MEMORY */
5681
5682 __kmp_suspend_uninitialize_thread( thread );
5683
5684 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5685 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5686
5687 -- __kmp_all_nth;
5688 // __kmp_nth was decremented when thread is added to the pool.
5689
5690#ifdef KMP_ADJUST_BLOCKTIME
5691 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005692 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005693 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5694 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5695 if ( __kmp_nth <= __kmp_avail_proc ) {
5696 __kmp_zero_bt = FALSE;
5697 }
5698 }
5699#endif /* KMP_ADJUST_BLOCKTIME */
5700
5701 /* free the memory being used */
5702 if( __kmp_env_consistency_check ) {
5703 if ( thread->th.th_cons ) {
5704 __kmp_free_cons_stack( thread->th.th_cons );
5705 thread->th.th_cons = NULL;
5706 }; // if
5707 }
5708
5709 if ( thread->th.th_pri_common != NULL ) {
5710 __kmp_free( thread->th.th_pri_common );
5711 thread->th.th_pri_common = NULL;
5712 }; // if
5713
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005714 if (thread->th.th_task_state_memo_stack != NULL) {
5715 __kmp_free(thread->th.th_task_state_memo_stack);
5716 thread->th.th_task_state_memo_stack = NULL;
5717 }
5718
Jim Cownie5e8470a2013-09-27 10:38:44 +00005719 #if KMP_USE_BGET
5720 if ( thread->th.th_local.bget_data != NULL ) {
5721 __kmp_finalize_bget( thread );
5722 }; // if
5723 #endif
5724
Alp Toker98758b02014-03-02 04:12:06 +00005725#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005726 if ( thread->th.th_affin_mask != NULL ) {
5727 KMP_CPU_FREE( thread->th.th_affin_mask );
5728 thread->th.th_affin_mask = NULL;
5729 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005730#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005731
5732 __kmp_reap_team( thread->th.th_serial_team );
5733 thread->th.th_serial_team = NULL;
5734 __kmp_free( thread );
5735
5736 KMP_MB();
5737
5738} // __kmp_reap_thread
5739
5740static void
5741__kmp_internal_end(void)
5742{
5743 int i;
5744
5745 /* First, unregister the library */
5746 __kmp_unregister_library();
5747
5748 #if KMP_OS_WINDOWS
5749 /* In Win static library, we can't tell when a root actually dies, so we
5750 reclaim the data structures for any root threads that have died but not
5751 unregistered themselves, in order to shut down cleanly.
5752 In Win dynamic library we also can't tell when a thread dies.
5753 */
5754 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5755 #endif
5756
5757 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5758 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005759 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005760 break;
5761 KMP_MB(); /* Flush all pending memory write invalidates. */
5762 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5763
5764 if ( i < __kmp_threads_capacity ) {
5765 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5766 KMP_MB(); /* Flush all pending memory write invalidates. */
5767
5768 //
5769 // Need to check that monitor was initialized before reaping it.
5770 // If we are called form __kmp_atfork_child (which sets
5771 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5772 // contain valid data, but it is only valid in the parent process,
5773 // not the child.
5774 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005775 // New behavior (201008): instead of keying off of the flag
5776 // __kmp_init_parallel, the monitor thread creation is keyed off
5777 // of the new flag __kmp_init_monitor.
5778 //
5779 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5780 if ( TCR_4( __kmp_init_monitor ) ) {
5781 __kmp_reap_monitor( & __kmp_monitor );
5782 TCW_4( __kmp_init_monitor, 0 );
5783 }
5784 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5785 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5786 } else {
5787 /* TODO move this to cleanup code */
5788 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005789 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005790 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5791 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005792// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005793 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005794 }
5795 }
5796 #endif
5797
5798 KMP_MB();
5799
5800 // Reap the worker threads.
5801 // This is valid for now, but be careful if threads are reaped sooner.
5802 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5803 // Get the next thread from the pool.
5804 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5805 __kmp_thread_pool = thread->th.th_next_pool;
5806 // Reap it.
5807 thread->th.th_next_pool = NULL;
5808 thread->th.th_in_pool = FALSE;
5809 __kmp_reap_thread( thread, 0 );
5810 }; // while
5811 __kmp_thread_pool_insert_pt = NULL;
5812
5813 // Reap teams.
5814 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5815 // Get the next team from the pool.
5816 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5817 __kmp_team_pool = team->t.t_next_pool;
5818 // Reap it.
5819 team->t.t_next_pool = NULL;
5820 __kmp_reap_team( team );
5821 }; // while
5822
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005823 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005824
5825 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5826 // TBD: Add some checking...
5827 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5828 }
5829
5830 /* Make sure all threadprivate destructors get run by joining with all worker
5831 threads before resetting this flag */
5832 TCW_SYNC_4(__kmp_init_common, FALSE);
5833
5834 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5835 KMP_MB();
5836
5837 //
5838 // See note above: One of the possible fixes for CQ138434 / CQ140126
5839 //
5840 // FIXME: push both code fragments down and CSE them?
5841 // push them into __kmp_cleanup() ?
5842 //
5843 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5844 if ( TCR_4( __kmp_init_monitor ) ) {
5845 __kmp_reap_monitor( & __kmp_monitor );
5846 TCW_4( __kmp_init_monitor, 0 );
5847 }
5848 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5849 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5850
5851 } /* else !__kmp_global.t_active */
5852 TCW_4(__kmp_init_gtid, FALSE);
5853 KMP_MB(); /* Flush all pending memory write invalidates. */
5854
Jim Cownie5e8470a2013-09-27 10:38:44 +00005855 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005856#if OMPT_SUPPORT
5857 ompt_fini();
5858#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005859}
5860
5861void
5862__kmp_internal_end_library( int gtid_req )
5863{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005864 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5865 /* this shouldn't be a race condition because __kmp_internal_end() is the
5866 * only place to clear __kmp_serial_init */
5867 /* we'll check this later too, after we get the lock */
5868 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5869 // because the next check will work in any case.
5870 if( __kmp_global.g.g_abort ) {
5871 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5872 /* TODO abort? */
5873 return;
5874 }
5875 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5876 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5877 return;
5878 }
5879
5880
5881 KMP_MB(); /* Flush all pending memory write invalidates. */
5882
5883 /* find out who we are and what we should do */
5884 {
5885 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5886 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5887 if( gtid == KMP_GTID_SHUTDOWN ) {
5888 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5889 return;
5890 } else if( gtid == KMP_GTID_MONITOR ) {
5891 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5892 return;
5893 } else if( gtid == KMP_GTID_DNE ) {
5894 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5895 /* we don't know who we are, but we may still shutdown the library */
5896 } else if( KMP_UBER_GTID( gtid )) {
5897 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005898 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005899 __kmp_global.g.g_abort = -1;
5900 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5901 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5902 return;
5903 } else {
5904 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5905 __kmp_unregister_root_current_thread( gtid );
5906 }
5907 } else {
5908 /* worker threads may call this function through the atexit handler, if they call exit() */
5909 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5910 TODO: do a thorough shutdown instead
5911 */
5912 #ifdef DUMP_DEBUG_ON_EXIT
5913 if ( __kmp_debug_buf )
5914 __kmp_dump_debug_buffer( );
5915 #endif
5916 return;
5917 }
5918 }
5919 /* synchronize the termination process */
5920 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5921
5922 /* have we already finished */
5923 if( __kmp_global.g.g_abort ) {
5924 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5925 /* TODO abort? */
5926 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5927 return;
5928 }
5929 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5930 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5931 return;
5932 }
5933
5934 /* We need this lock to enforce mutex between this reading of
5935 __kmp_threads_capacity and the writing by __kmp_register_root.
5936 Alternatively, we can use a counter of roots that is
5937 atomically updated by __kmp_get_global_thread_id_reg,
5938 __kmp_do_serial_initialize and __kmp_internal_end_*.
5939 */
5940 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5941
5942 /* now we can safely conduct the actual termination */
5943 __kmp_internal_end();
5944
5945 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5946 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5947
5948 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5949
5950 #ifdef DUMP_DEBUG_ON_EXIT
5951 if ( __kmp_debug_buf )
5952 __kmp_dump_debug_buffer();
5953 #endif
5954
5955 #if KMP_OS_WINDOWS
5956 __kmp_close_console();
5957 #endif
5958
5959 __kmp_fini_allocator();
5960
5961} // __kmp_internal_end_library
5962
5963void
5964__kmp_internal_end_thread( int gtid_req )
5965{
5966 int i;
5967
5968 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5969 /* this shouldn't be a race condition because __kmp_internal_end() is the
5970 * only place to clear __kmp_serial_init */
5971 /* we'll check this later too, after we get the lock */
5972 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
5973 // because the next check will work in any case.
5974 if( __kmp_global.g.g_abort ) {
5975 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
5976 /* TODO abort? */
5977 return;
5978 }
5979 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5980 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
5981 return;
5982 }
5983
5984 KMP_MB(); /* Flush all pending memory write invalidates. */
5985
5986 /* find out who we are and what we should do */
5987 {
5988 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5989 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5990 if( gtid == KMP_GTID_SHUTDOWN ) {
5991 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5992 return;
5993 } else if( gtid == KMP_GTID_MONITOR ) {
5994 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5995 return;
5996 } else if( gtid == KMP_GTID_DNE ) {
5997 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
5998 return;
5999 /* we don't know who we are */
6000 } else if( KMP_UBER_GTID( gtid )) {
6001 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006002 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006003 __kmp_global.g.g_abort = -1;
6004 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6005 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6006 return;
6007 } else {
6008 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6009 __kmp_unregister_root_current_thread( gtid );
6010 }
6011 } else {
6012 /* just a worker thread, let's leave */
6013 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6014
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006015 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006016 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006017 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006018
6019 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6020 return;
6021 }
6022 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006023 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006024 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6025 // because we will better shutdown later in the library destructor.
6026 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006027 // in a loop forks and joins many openmp threads. We can save a lot of time
6028 // keeping worker threads alive until the program shutdown.
6029 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6030 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006031 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006032 return;
6033 #endif
6034 /* synchronize the termination process */
6035 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6036
6037 /* have we already finished */
6038 if( __kmp_global.g.g_abort ) {
6039 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6040 /* TODO abort? */
6041 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6042 return;
6043 }
6044 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6045 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6046 return;
6047 }
6048
6049 /* We need this lock to enforce mutex between this reading of
6050 __kmp_threads_capacity and the writing by __kmp_register_root.
6051 Alternatively, we can use a counter of roots that is
6052 atomically updated by __kmp_get_global_thread_id_reg,
6053 __kmp_do_serial_initialize and __kmp_internal_end_*.
6054 */
6055
6056 /* should we finish the run-time? are all siblings done? */
6057 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6058
6059 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6060 if ( KMP_UBER_GTID( i ) ) {
6061 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6062 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6063 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6064 return;
6065 };
6066 }
6067
6068 /* now we can safely conduct the actual termination */
6069
6070 __kmp_internal_end();
6071
6072 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6073 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6074
Jim Cownie77c2a632014-09-03 11:34:33 +00006075 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006076
6077 #ifdef DUMP_DEBUG_ON_EXIT
6078 if ( __kmp_debug_buf )
6079 __kmp_dump_debug_buffer();
6080 #endif
6081} // __kmp_internal_end_thread
6082
6083// -------------------------------------------------------------------------------------------------
6084// Library registration stuff.
6085
6086static long __kmp_registration_flag = 0;
6087 // Random value used to indicate library initialization.
6088static char * __kmp_registration_str = NULL;
6089 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6090
6091
6092static inline
6093char *
6094__kmp_reg_status_name() {
6095 /*
6096 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6097 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6098 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6099 */
6100 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6101} // __kmp_reg_status_get
6102
6103
6104void
6105__kmp_register_library_startup(
6106 void
6107) {
6108
6109 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6110 int done = 0;
6111 union {
6112 double dtime;
6113 long ltime;
6114 } time;
6115 #if KMP_OS_WINDOWS
6116 __kmp_initialize_system_tick();
6117 #endif
6118 __kmp_read_system_time( & time.dtime );
6119 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6120 __kmp_registration_str =
6121 __kmp_str_format(
6122 "%p-%lx-%s",
6123 & __kmp_registration_flag,
6124 __kmp_registration_flag,
6125 KMP_LIBRARY_FILE
6126 );
6127
6128 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6129
6130 while ( ! done ) {
6131
6132 char * value = NULL; // Actual value of the environment variable.
6133
6134 // Set environment variable, but do not overwrite if it is exist.
6135 __kmp_env_set( name, __kmp_registration_str, 0 );
6136 // Check the variable is written.
6137 value = __kmp_env_get( name );
6138 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6139
6140 done = 1; // Ok, environment variable set successfully, exit the loop.
6141
6142 } else {
6143
6144 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6145 // Check whether it alive or dead.
6146 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6147 char * tail = value;
6148 char * flag_addr_str = NULL;
6149 char * flag_val_str = NULL;
6150 char const * file_name = NULL;
6151 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6152 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6153 file_name = tail;
6154 if ( tail != NULL ) {
6155 long * flag_addr = 0;
6156 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006157 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6158 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006159 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6160 // First, check whether environment-encoded address is mapped into addr space.
6161 // If so, dereference it to see if it still has the right value.
6162
6163 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6164 neighbor = 1;
6165 } else {
6166 // If not, then we know the other copy of the library is no longer running.
6167 neighbor = 2;
6168 }; // if
6169 }; // if
6170 }; // if
6171 switch ( neighbor ) {
6172 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6173 // Assume it is the incompatible format of future version of the library.
6174 // Assume the other library is alive.
6175 // WARN( ... ); // TODO: Issue a warning.
6176 file_name = "unknown library";
6177 // Attention! Falling to the next case. That's intentional.
6178 case 1 : { // Neighbor is alive.
6179 // Check it is allowed.
6180 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6181 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6182 // That's not allowed. Issue fatal error.
6183 __kmp_msg(
6184 kmp_ms_fatal,
6185 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6186 KMP_HNT( DuplicateLibrary ),
6187 __kmp_msg_null
6188 );
6189 }; // if
6190 KMP_INTERNAL_FREE( duplicate_ok );
6191 __kmp_duplicate_library_ok = 1;
6192 done = 1; // Exit the loop.
6193 } break;
6194 case 2 : { // Neighbor is dead.
6195 // Clear the variable and try to register library again.
6196 __kmp_env_unset( name );
6197 } break;
6198 default : {
6199 KMP_DEBUG_ASSERT( 0 );
6200 } break;
6201 }; // switch
6202
6203 }; // if
6204 KMP_INTERNAL_FREE( (void *) value );
6205
6206 }; // while
6207 KMP_INTERNAL_FREE( (void *) name );
6208
6209} // func __kmp_register_library_startup
6210
6211
6212void
6213__kmp_unregister_library( void ) {
6214
6215 char * name = __kmp_reg_status_name();
6216 char * value = __kmp_env_get( name );
6217
6218 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6219 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6220 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6221 // Ok, this is our variable. Delete it.
6222 __kmp_env_unset( name );
6223 }; // if
6224
6225 KMP_INTERNAL_FREE( __kmp_registration_str );
6226 KMP_INTERNAL_FREE( value );
6227 KMP_INTERNAL_FREE( name );
6228
6229 __kmp_registration_flag = 0;
6230 __kmp_registration_str = NULL;
6231
6232} // __kmp_unregister_library
6233
6234
6235// End of Library registration stuff.
6236// -------------------------------------------------------------------------------------------------
6237
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006238#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6239
6240static void __kmp_check_mic_type()
6241{
6242 kmp_cpuid_t cpuid_state = {0};
6243 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006244 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006245 // We don't support mic1 at the moment
6246 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6247 __kmp_mic_type = mic2;
6248 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6249 __kmp_mic_type = mic3;
6250 } else {
6251 __kmp_mic_type = non_mic;
6252 }
6253}
6254
6255#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6256
Jim Cownie5e8470a2013-09-27 10:38:44 +00006257static void
6258__kmp_do_serial_initialize( void )
6259{
6260 int i, gtid;
6261 int size;
6262
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006263 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006264
6265 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6266 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6267 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6268 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6269 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6270
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006271#if OMPT_SUPPORT
6272 ompt_pre_init();
6273#endif
6274
Jim Cownie5e8470a2013-09-27 10:38:44 +00006275 __kmp_validate_locks();
6276
6277 /* Initialize internal memory allocator */
6278 __kmp_init_allocator();
6279
6280 /* Register the library startup via an environment variable
6281 and check to see whether another copy of the library is already
6282 registered. */
6283
6284 __kmp_register_library_startup( );
6285
6286 /* TODO reinitialization of library */
6287 if( TCR_4(__kmp_global.g.g_done) ) {
6288 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6289 }
6290
6291 __kmp_global.g.g_abort = 0;
6292 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6293
6294 /* initialize the locks */
6295#if KMP_USE_ADAPTIVE_LOCKS
6296#if KMP_DEBUG_ADAPTIVE_LOCKS
6297 __kmp_init_speculative_stats();
6298#endif
6299#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006300#if KMP_STATS_ENABLED
6301 __kmp_init_tas_lock( & __kmp_stats_lock );
6302#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006303 __kmp_init_lock( & __kmp_global_lock );
6304 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6305 __kmp_init_lock( & __kmp_debug_lock );
6306 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6307 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6308 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6309 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6310 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6311 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6312 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6313 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6314 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6315 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6316 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6317 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6318 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6319 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6320 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6321 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6322 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6323
6324 /* conduct initialization and initial setup of configuration */
6325
6326 __kmp_runtime_initialize();
6327
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006328#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6329 __kmp_check_mic_type();
6330#endif
6331
Jim Cownie5e8470a2013-09-27 10:38:44 +00006332 // Some global variable initialization moved here from kmp_env_initialize()
6333#ifdef KMP_DEBUG
6334 kmp_diag = 0;
6335#endif
6336 __kmp_abort_delay = 0;
6337
6338 // From __kmp_init_dflt_team_nth()
6339 /* assume the entire machine will be used */
6340 __kmp_dflt_team_nth_ub = __kmp_xproc;
6341 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6342 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6343 }
6344 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6345 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6346 }
6347 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006348
6349 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6350 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6351 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6352 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6353 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6354 __kmp_library = library_throughput;
6355 // From KMP_SCHEDULE initialization
6356 __kmp_static = kmp_sch_static_balanced;
6357 // AC: do not use analytical here, because it is non-monotonous
6358 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006359 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006360 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6361 // control parts
6362 #if KMP_FAST_REDUCTION_BARRIER
6363 #define kmp_reduction_barrier_gather_bb ((int)1)
6364 #define kmp_reduction_barrier_release_bb ((int)1)
6365 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6366 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6367 #endif // KMP_FAST_REDUCTION_BARRIER
6368 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6369 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6370 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6371 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6372 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6373 #if KMP_FAST_REDUCTION_BARRIER
6374 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6375 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6376 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6377 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6378 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6379 }
6380 #endif // KMP_FAST_REDUCTION_BARRIER
6381 }
6382 #if KMP_FAST_REDUCTION_BARRIER
6383 #undef kmp_reduction_barrier_release_pat
6384 #undef kmp_reduction_barrier_gather_pat
6385 #undef kmp_reduction_barrier_release_bb
6386 #undef kmp_reduction_barrier_gather_bb
6387 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006388#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006389 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006390 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006391 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006392 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6393 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6394 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6395 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006396#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006397 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006398 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6399 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6400 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006401#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006402#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006403
6404 // From KMP_CHECKS initialization
6405#ifdef KMP_DEBUG
6406 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6407#else
6408 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6409#endif
6410
6411 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6412 __kmp_foreign_tp = TRUE;
6413
6414 __kmp_global.g.g_dynamic = FALSE;
6415 __kmp_global.g.g_dynamic_mode = dynamic_default;
6416
6417 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006418
Jim Cownie5e8470a2013-09-27 10:38:44 +00006419 // Print all messages in message catalog for testing purposes.
6420 #ifdef KMP_DEBUG
6421 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6422 if ( __kmp_str_match_true( val ) ) {
6423 kmp_str_buf_t buffer;
6424 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006425 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006426 __kmp_printf( "%s", buffer.str );
6427 __kmp_str_buf_free( & buffer );
6428 }; // if
6429 __kmp_env_free( & val );
6430 #endif
6431
Jim Cownie181b4bb2013-12-23 17:28:57 +00006432 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006433 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6434 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6435
Jim Cownie5e8470a2013-09-27 10:38:44 +00006436 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6437 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6438 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6439 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6440 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6441 __kmp_thread_pool = NULL;
6442 __kmp_thread_pool_insert_pt = NULL;
6443 __kmp_team_pool = NULL;
6444
6445 /* Allocate all of the variable sized records */
6446 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6447 /* Since allocation is cache-aligned, just add extra padding at the end */
6448 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6449 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6450 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6451
6452 /* init thread counts */
6453 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6454 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6455 __kmp_all_nth = 0;
6456 __kmp_nth = 0;
6457
6458 /* setup the uber master thread and hierarchy */
6459 gtid = __kmp_register_root( TRUE );
6460 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6461 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6462 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6463
6464 KMP_MB(); /* Flush all pending memory write invalidates. */
6465
6466 __kmp_common_initialize();
6467
6468 #if KMP_OS_UNIX
6469 /* invoke the child fork handler */
6470 __kmp_register_atfork();
6471 #endif
6472
Jonathan Peyton99016992015-05-26 17:32:53 +00006473 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006474 {
6475 /* Invoke the exit handler when the program finishes, only for static library.
6476 For dynamic library, we already have _fini and DllMain.
6477 */
6478 int rc = atexit( __kmp_internal_end_atexit );
6479 if ( rc != 0 ) {
6480 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6481 }; // if
6482 }
6483 #endif
6484
6485 #if KMP_HANDLE_SIGNALS
6486 #if KMP_OS_UNIX
6487 /* NOTE: make sure that this is called before the user installs
6488 * their own signal handlers so that the user handlers
6489 * are called first. this way they can return false,
6490 * not call our handler, avoid terminating the library,
6491 * and continue execution where they left off. */
6492 __kmp_install_signals( FALSE );
6493 #endif /* KMP_OS_UNIX */
6494 #if KMP_OS_WINDOWS
6495 __kmp_install_signals( TRUE );
6496 #endif /* KMP_OS_WINDOWS */
6497 #endif
6498
6499 /* we have finished the serial initialization */
6500 __kmp_init_counter ++;
6501
6502 __kmp_init_serial = TRUE;
6503
6504 if (__kmp_settings) {
6505 __kmp_env_print();
6506 }
6507
6508#if OMP_40_ENABLED
6509 if (__kmp_display_env || __kmp_display_env_verbose) {
6510 __kmp_env_print_2();
6511 }
6512#endif // OMP_40_ENABLED
6513
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006514#if OMPT_SUPPORT
6515 ompt_post_init();
6516#endif
6517
Jim Cownie5e8470a2013-09-27 10:38:44 +00006518 KMP_MB();
6519
6520 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6521}
6522
6523void
6524__kmp_serial_initialize( void )
6525{
6526 if ( __kmp_init_serial ) {
6527 return;
6528 }
6529 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6530 if ( __kmp_init_serial ) {
6531 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6532 return;
6533 }
6534 __kmp_do_serial_initialize();
6535 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6536}
6537
6538static void
6539__kmp_do_middle_initialize( void )
6540{
6541 int i, j;
6542 int prev_dflt_team_nth;
6543
6544 if( !__kmp_init_serial ) {
6545 __kmp_do_serial_initialize();
6546 }
6547
6548 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6549
6550 //
6551 // Save the previous value for the __kmp_dflt_team_nth so that
6552 // we can avoid some reinitialization if it hasn't changed.
6553 //
6554 prev_dflt_team_nth = __kmp_dflt_team_nth;
6555
Alp Toker98758b02014-03-02 04:12:06 +00006556#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006557 //
6558 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6559 // number of cores on the machine.
6560 //
6561 __kmp_affinity_initialize();
6562
6563 //
6564 // Run through the __kmp_threads array and set the affinity mask
6565 // for each root thread that is currently registered with the RTL.
6566 //
6567 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6568 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6569 __kmp_affinity_set_init_mask( i, TRUE );
6570 }
6571 }
Alp Toker98758b02014-03-02 04:12:06 +00006572#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006573
6574 KMP_ASSERT( __kmp_xproc > 0 );
6575 if ( __kmp_avail_proc == 0 ) {
6576 __kmp_avail_proc = __kmp_xproc;
6577 }
6578
6579 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6580 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006581 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006582 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6583 j++;
6584 }
6585
6586 if ( __kmp_dflt_team_nth == 0 ) {
6587#ifdef KMP_DFLT_NTH_CORES
6588 //
6589 // Default #threads = #cores
6590 //
6591 __kmp_dflt_team_nth = __kmp_ncores;
6592 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6593 __kmp_dflt_team_nth ) );
6594#else
6595 //
6596 // Default #threads = #available OS procs
6597 //
6598 __kmp_dflt_team_nth = __kmp_avail_proc;
6599 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6600 __kmp_dflt_team_nth ) );
6601#endif /* KMP_DFLT_NTH_CORES */
6602 }
6603
6604 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6605 __kmp_dflt_team_nth = KMP_MIN_NTH;
6606 }
6607 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6608 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6609 }
6610
6611 //
6612 // There's no harm in continuing if the following check fails,
6613 // but it indicates an error in the previous logic.
6614 //
6615 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6616
6617 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6618 //
6619 // Run through the __kmp_threads array and set the num threads icv
6620 // for each root thread that is currently registered with the RTL
6621 // (which has not already explicitly set its nthreads-var with a
6622 // call to omp_set_num_threads()).
6623 //
6624 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6625 kmp_info_t *thread = __kmp_threads[ i ];
6626 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006627 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006628
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006629 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006630 }
6631 }
6632 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6633 __kmp_dflt_team_nth) );
6634
6635#ifdef KMP_ADJUST_BLOCKTIME
6636 /* Adjust blocktime to zero if necessary */
6637 /* now that __kmp_avail_proc is set */
6638 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6639 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6640 if ( __kmp_nth > __kmp_avail_proc ) {
6641 __kmp_zero_bt = TRUE;
6642 }
6643 }
6644#endif /* KMP_ADJUST_BLOCKTIME */
6645
6646 /* we have finished middle initialization */
6647 TCW_SYNC_4(__kmp_init_middle, TRUE);
6648
6649 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6650}
6651
6652void
6653__kmp_middle_initialize( void )
6654{
6655 if ( __kmp_init_middle ) {
6656 return;
6657 }
6658 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6659 if ( __kmp_init_middle ) {
6660 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6661 return;
6662 }
6663 __kmp_do_middle_initialize();
6664 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6665}
6666
6667void
6668__kmp_parallel_initialize( void )
6669{
6670 int gtid = __kmp_entry_gtid(); // this might be a new root
6671
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006672 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006673 if( TCR_4(__kmp_init_parallel) ) return;
6674 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6675 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6676
6677 /* TODO reinitialization after we have already shut down */
6678 if( TCR_4(__kmp_global.g.g_done) ) {
6679 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6680 __kmp_infinite_loop();
6681 }
6682
6683 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6684 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6685 */
6686 if( !__kmp_init_middle ) {
6687 __kmp_do_middle_initialize();
6688 }
6689
6690 /* begin initialization */
6691 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6692 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6693
6694#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6695 //
6696 // Save the FP control regs.
6697 // Worker threads will set theirs to these values at thread startup.
6698 //
6699 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6700 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6701 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6702#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6703
6704#if KMP_OS_UNIX
6705# if KMP_HANDLE_SIGNALS
6706 /* must be after __kmp_serial_initialize */
6707 __kmp_install_signals( TRUE );
6708# endif
6709#endif
6710
6711 __kmp_suspend_initialize();
6712
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006713#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006714 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6715 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6716 }
6717#else
6718 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6719 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6720 }
6721#endif
6722
6723 if ( __kmp_version ) {
6724 __kmp_print_version_2();
6725 }
6726
Jim Cownie5e8470a2013-09-27 10:38:44 +00006727 /* we have finished parallel initialization */
6728 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6729
6730 KMP_MB();
6731 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6732
6733 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6734}
6735
6736
6737/* ------------------------------------------------------------------------ */
6738
6739void
6740__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6741 kmp_team_t *team )
6742{
6743 kmp_disp_t *dispatch;
6744
6745 KMP_MB();
6746
6747 /* none of the threads have encountered any constructs, yet. */
6748 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006749#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006750 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751#endif /* KMP_CACHE_MANAGE */
6752 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6753 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006754 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6755 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006756
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006757 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006758#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006759 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6760#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006761 if( __kmp_env_consistency_check )
6762 __kmp_push_parallel( gtid, team->t.t_ident );
6763
6764 KMP_MB(); /* Flush all pending memory write invalidates. */
6765}
6766
6767void
6768__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6769 kmp_team_t *team )
6770{
6771 if( __kmp_env_consistency_check )
6772 __kmp_pop_parallel( gtid, team->t.t_ident );
6773}
6774
6775int
6776__kmp_invoke_task_func( int gtid )
6777{
6778 int rc;
6779 int tid = __kmp_tid_from_gtid( gtid );
6780 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006781 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006782
6783 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6784#if USE_ITT_BUILD
6785 if ( __itt_stack_caller_create_ptr ) {
6786 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6787 }
6788#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006789#if INCLUDE_SSC_MARKS
6790 SSC_MARK_INVOKING();
6791#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006792
6793#if OMPT_SUPPORT
6794 void *dummy;
6795 void **exit_runtime_p;
6796 ompt_task_id_t my_task_id;
6797 ompt_parallel_id_t my_parallel_id;
6798
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006799 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006800 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6801 ompt_task_info.frame.exit_runtime_frame);
6802 } else {
6803 exit_runtime_p = &dummy;
6804 }
6805
6806#if OMPT_TRACE
6807 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6808 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006809 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006810 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6811 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6812 my_parallel_id, my_task_id);
6813 }
6814#endif
6815#endif
6816
Jonathan Peyton45be4502015-08-11 21:36:41 +00006817 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006818 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6819 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006820 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6821 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006822#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006823 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006824#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006825 );
6826 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006827
Jim Cownie5e8470a2013-09-27 10:38:44 +00006828#if USE_ITT_BUILD
6829 if ( __itt_stack_caller_create_ptr ) {
6830 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6831 }
6832#endif /* USE_ITT_BUILD */
6833 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6834
6835 return rc;
6836}
6837
6838#if OMP_40_ENABLED
6839void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006840__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006841{
6842 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006843 kmp_info_t *thr = __kmp_threads[ gtid ];
6844 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006845 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006846 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6847 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6848 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006849 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006850 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851 // Launch league of teams now, but not let workers execute
6852 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006853#if INCLUDE_SSC_MARKS
6854 SSC_MARK_FORKING();
6855#endif
6856 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006857 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006858#if OMPT_SUPPORT
6859 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6860#endif
6861 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006862 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6863 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006864#if INCLUDE_SSC_MARKS
6865 SSC_MARK_JOINING();
6866#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006867
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006868 // AC: last parameter "1" eliminates join barrier which won't work because
6869 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006870 __kmp_join_call( loc, gtid
6871#if OMPT_SUPPORT
6872 , fork_context_intel
6873#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006874 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006875}
6876
6877int
6878__kmp_invoke_teams_master( int gtid )
6879{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006880 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6881 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006882 #if KMP_DEBUG
6883 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6884 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6885 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006886 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6887 __kmp_teams_master( gtid );
6888 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006889 return 1;
6890}
6891#endif /* OMP_40_ENABLED */
6892
6893/* this sets the requested number of threads for the next parallel region
6894 * encountered by this team */
6895/* since this should be enclosed in the forkjoin critical section it
6896 * should avoid race conditions with assymmetrical nested parallelism */
6897
6898void
6899__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6900{
6901 kmp_info_t *thr = __kmp_threads[gtid];
6902
6903 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006904 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006905}
6906
6907#if OMP_40_ENABLED
6908
6909/* this sets the requested number of teams for the teams region and/or
6910 * the number of threads for the next parallel region encountered */
6911void
6912__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6913{
6914 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006915 KMP_DEBUG_ASSERT(num_teams >= 0);
6916 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006917
6918 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006919 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006920 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6921 if ( !__kmp_reserve_warn ) {
6922 __kmp_reserve_warn = 1;
6923 __kmp_msg(
6924 kmp_ms_warning,
6925 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6926 KMP_HNT( Unset_ALL_THREADS ),
6927 __kmp_msg_null
6928 );
6929 }
6930 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006931 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006932 // Set number of teams (number of threads in the outer "parallel" of the teams)
6933 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6934
6935 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006936 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006937 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006938 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006939 num_threads = __kmp_avail_proc / num_teams;
6940 if( num_teams * num_threads > __kmp_max_nth ) {
6941 // adjust num_threads w/o warning as it is not user setting
6942 num_threads = __kmp_max_nth / num_teams;
6943 }
6944 } else {
6945 if( num_teams * num_threads > __kmp_max_nth ) {
6946 int new_threads = __kmp_max_nth / num_teams;
6947 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6948 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6949 __kmp_msg(
6950 kmp_ms_warning,
6951 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6952 KMP_HNT( Unset_ALL_THREADS ),
6953 __kmp_msg_null
6954 );
6955 }
6956 num_threads = new_threads;
6957 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006958 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006959 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960}
6961
6962
6963//
6964// Set the proc_bind var to use in the following parallel region.
6965//
6966void
6967__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
6968{
6969 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006970 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006971}
6972
6973#endif /* OMP_40_ENABLED */
6974
6975/* Launch the worker threads into the microtask. */
6976
6977void
6978__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
6979{
6980 kmp_info_t *this_thr = __kmp_threads[gtid];
6981
6982#ifdef KMP_DEBUG
6983 int f;
6984#endif /* KMP_DEBUG */
6985
6986 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006987 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006988 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6989 KMP_MB(); /* Flush all pending memory write invalidates. */
6990
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006991 team->t.t_construct = 0; /* no single directives seen yet */
6992 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006993
6994 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006995 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006996 if ( team->t.t_max_nproc > 1 ) {
6997 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00006998 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006999 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007000#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007001 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7002#endif
7003 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007004 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007005 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007006#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007007 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7008#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007009 }
7010
7011 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007012 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007013
7014#ifdef KMP_DEBUG
7015 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7016 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7017 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7018 }
7019#endif /* KMP_DEBUG */
7020
7021 /* release the worker threads so they may begin working */
7022 __kmp_fork_barrier( gtid, 0 );
7023}
7024
7025
7026void
7027__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7028{
7029 kmp_info_t *this_thr = __kmp_threads[gtid];
7030
7031 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007032 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007033 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7034 KMP_MB(); /* Flush all pending memory write invalidates. */
7035
7036 /* Join barrier after fork */
7037
7038#ifdef KMP_DEBUG
7039 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7040 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7041 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7042 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7043 __kmp_print_structure();
7044 }
7045 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7046 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7047#endif /* KMP_DEBUG */
7048
7049 __kmp_join_barrier( gtid ); /* wait for everyone */
7050
7051 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007052 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007053}
7054
7055
7056/* ------------------------------------------------------------------------ */
7057/* ------------------------------------------------------------------------ */
7058
7059#ifdef USE_LOAD_BALANCE
7060
7061//
7062// Return the worker threads actively spinning in the hot team, if we
7063// are at the outermost level of parallelism. Otherwise, return 0.
7064//
7065static int
7066__kmp_active_hot_team_nproc( kmp_root_t *root )
7067{
7068 int i;
7069 int retval;
7070 kmp_team_t *hot_team;
7071
7072 if ( root->r.r_active ) {
7073 return 0;
7074 }
7075 hot_team = root->r.r_hot_team;
7076 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7077 return hot_team->t.t_nproc - 1; // Don't count master thread
7078 }
7079
7080 //
7081 // Skip the master thread - it is accounted for elsewhere.
7082 //
7083 retval = 0;
7084 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7085 if ( hot_team->t.t_threads[i]->th.th_active ) {
7086 retval++;
7087 }
7088 }
7089 return retval;
7090}
7091
7092//
7093// Perform an automatic adjustment to the number of
7094// threads used by the next parallel region.
7095//
7096static int
7097__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7098{
7099 int retval;
7100 int pool_active;
7101 int hot_team_active;
7102 int team_curr_active;
7103 int system_active;
7104
7105 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7106 root, set_nproc ) );
7107 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007108 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007109 KMP_DEBUG_ASSERT( set_nproc > 1 );
7110
7111 if ( set_nproc == 1) {
7112 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7113 return 1;
7114 }
7115
7116 //
7117 // Threads that are active in the thread pool, active in the hot team
7118 // for this particular root (if we are at the outer par level), and
7119 // the currently executing thread (to become the master) are available
7120 // to add to the new team, but are currently contributing to the system
7121 // load, and must be accounted for.
7122 //
7123 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7124 hot_team_active = __kmp_active_hot_team_nproc( root );
7125 team_curr_active = pool_active + hot_team_active + 1;
7126
7127 //
7128 // Check the system load.
7129 //
7130 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7131 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7132 system_active, pool_active, hot_team_active ) );
7133
7134 if ( system_active < 0 ) {
7135 //
7136 // There was an error reading the necessary info from /proc,
7137 // so use the thread limit algorithm instead. Once we set
7138 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7139 // we shouldn't wind up getting back here.
7140 //
7141 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7142 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7143
7144 //
7145 // Make this call behave like the thread limit algorithm.
7146 //
7147 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7148 : root->r.r_hot_team->t.t_nproc);
7149 if ( retval > set_nproc ) {
7150 retval = set_nproc;
7151 }
7152 if ( retval < KMP_MIN_NTH ) {
7153 retval = KMP_MIN_NTH;
7154 }
7155
7156 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7157 return retval;
7158 }
7159
7160 //
7161 // There is a slight delay in the load balance algorithm in detecting
7162 // new running procs. The real system load at this instant should be
7163 // at least as large as the #active omp thread that are available to
7164 // add to the team.
7165 //
7166 if ( system_active < team_curr_active ) {
7167 system_active = team_curr_active;
7168 }
7169 retval = __kmp_avail_proc - system_active + team_curr_active;
7170 if ( retval > set_nproc ) {
7171 retval = set_nproc;
7172 }
7173 if ( retval < KMP_MIN_NTH ) {
7174 retval = KMP_MIN_NTH;
7175 }
7176
7177 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7178 return retval;
7179} // __kmp_load_balance_nproc()
7180
7181#endif /* USE_LOAD_BALANCE */
7182
Jim Cownie5e8470a2013-09-27 10:38:44 +00007183/* ------------------------------------------------------------------------ */
7184/* ------------------------------------------------------------------------ */
7185
7186/* NOTE: this is called with the __kmp_init_lock held */
7187void
7188__kmp_cleanup( void )
7189{
7190 int f;
7191
7192 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7193
7194 if (TCR_4(__kmp_init_parallel)) {
7195#if KMP_HANDLE_SIGNALS
7196 __kmp_remove_signals();
7197#endif
7198 TCW_4(__kmp_init_parallel, FALSE);
7199 }
7200
7201 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007202#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007203 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007204#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007205 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007206 TCW_4(__kmp_init_middle, FALSE);
7207 }
7208
7209 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7210
7211 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007212 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007213 __kmp_init_serial = FALSE;
7214 }
7215
7216 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7217 if ( __kmp_root[ f ] != NULL ) {
7218 __kmp_free( __kmp_root[ f ] );
7219 __kmp_root[ f ] = NULL;
7220 }
7221 }
7222 __kmp_free( __kmp_threads );
7223 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7224 // freeing __kmp_root.
7225 __kmp_threads = NULL;
7226 __kmp_root = NULL;
7227 __kmp_threads_capacity = 0;
7228
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007229#if KMP_USE_DYNAMIC_LOCK
7230 __kmp_cleanup_indirect_user_locks();
7231#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007232 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007233#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007234
Alp Toker98758b02014-03-02 04:12:06 +00007235 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007236 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7237 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007238 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007239
7240 #if KMP_USE_ADAPTIVE_LOCKS
7241 #if KMP_DEBUG_ADAPTIVE_LOCKS
7242 __kmp_print_speculative_stats();
7243 #endif
7244 #endif
7245 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7246 __kmp_nested_nth.nth = NULL;
7247 __kmp_nested_nth.size = 0;
7248 __kmp_nested_nth.used = 0;
7249
7250 __kmp_i18n_catclose();
7251
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007252#if KMP_STATS_ENABLED
7253 __kmp_accumulate_stats_at_exit();
7254 __kmp_stats_list.deallocate();
7255#endif
7256
Jim Cownie5e8470a2013-09-27 10:38:44 +00007257 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7258}
7259
7260/* ------------------------------------------------------------------------ */
7261/* ------------------------------------------------------------------------ */
7262
7263int
7264__kmp_ignore_mppbeg( void )
7265{
7266 char *env;
7267
7268 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7269 if (__kmp_str_match_false( env ))
7270 return FALSE;
7271 }
7272 // By default __kmpc_begin() is no-op.
7273 return TRUE;
7274}
7275
7276int
7277__kmp_ignore_mppend( void )
7278{
7279 char *env;
7280
7281 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7282 if (__kmp_str_match_false( env ))
7283 return FALSE;
7284 }
7285 // By default __kmpc_end() is no-op.
7286 return TRUE;
7287}
7288
7289void
7290__kmp_internal_begin( void )
7291{
7292 int gtid;
7293 kmp_root_t *root;
7294
7295 /* this is a very important step as it will register new sibling threads
7296 * and assign these new uber threads a new gtid */
7297 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007298 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007299 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7300
7301 if( root->r.r_begin ) return;
7302 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7303 if( root->r.r_begin ) {
7304 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7305 return;
7306 }
7307
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007308 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007309
7310 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7311}
7312
7313
7314/* ------------------------------------------------------------------------ */
7315/* ------------------------------------------------------------------------ */
7316
7317void
7318__kmp_user_set_library (enum library_type arg)
7319{
7320 int gtid;
7321 kmp_root_t *root;
7322 kmp_info_t *thread;
7323
7324 /* first, make sure we are initialized so we can get our gtid */
7325
7326 gtid = __kmp_entry_gtid();
7327 thread = __kmp_threads[ gtid ];
7328
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007329 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007330
7331 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7332 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7333 KMP_WARNING( SetLibraryIncorrectCall );
7334 return;
7335 }
7336
7337 switch ( arg ) {
7338 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007339 thread->th.th_set_nproc = 0;
7340 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007341 break;
7342 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007343 thread->th.th_set_nproc = 0;
7344 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007345 break;
7346 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007347 thread->th.th_set_nproc = 0;
7348 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007349 break;
7350 default:
7351 KMP_FATAL( UnknownLibraryType, arg );
7352 }
7353
7354 __kmp_aux_set_library ( arg );
7355}
7356
7357void
7358__kmp_aux_set_stacksize( size_t arg )
7359{
7360 if (! __kmp_init_serial)
7361 __kmp_serial_initialize();
7362
7363#if KMP_OS_DARWIN
7364 if (arg & (0x1000 - 1)) {
7365 arg &= ~(0x1000 - 1);
7366 if(arg + 0x1000) /* check for overflow if we round up */
7367 arg += 0x1000;
7368 }
7369#endif
7370 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7371
7372 /* only change the default stacksize before the first parallel region */
7373 if (! TCR_4(__kmp_init_parallel)) {
7374 size_t value = arg; /* argument is in bytes */
7375
7376 if (value < __kmp_sys_min_stksize )
7377 value = __kmp_sys_min_stksize ;
7378 else if (value > KMP_MAX_STKSIZE)
7379 value = KMP_MAX_STKSIZE;
7380
7381 __kmp_stksize = value;
7382
7383 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7384 }
7385
7386 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7387}
7388
7389/* set the behaviour of the runtime library */
7390/* TODO this can cause some odd behaviour with sibling parallelism... */
7391void
7392__kmp_aux_set_library (enum library_type arg)
7393{
7394 __kmp_library = arg;
7395
7396 switch ( __kmp_library ) {
7397 case library_serial :
7398 {
7399 KMP_INFORM( LibraryIsSerial );
7400 (void) __kmp_change_library( TRUE );
7401 }
7402 break;
7403 case library_turnaround :
7404 (void) __kmp_change_library( TRUE );
7405 break;
7406 case library_throughput :
7407 (void) __kmp_change_library( FALSE );
7408 break;
7409 default:
7410 KMP_FATAL( UnknownLibraryType, arg );
7411 }
7412}
7413
7414/* ------------------------------------------------------------------------ */
7415/* ------------------------------------------------------------------------ */
7416
7417void
7418__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7419{
7420 int blocktime = arg; /* argument is in milliseconds */
7421 int bt_intervals;
7422 int bt_set;
7423
7424 __kmp_save_internal_controls( thread );
7425
7426 /* Normalize and set blocktime for the teams */
7427 if (blocktime < KMP_MIN_BLOCKTIME)
7428 blocktime = KMP_MIN_BLOCKTIME;
7429 else if (blocktime > KMP_MAX_BLOCKTIME)
7430 blocktime = KMP_MAX_BLOCKTIME;
7431
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007432 set__blocktime_team( thread->th.th_team, tid, blocktime );
7433 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007434
7435 /* Calculate and set blocktime intervals for the teams */
7436 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7437
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007438 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7439 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007440
7441 /* Set whether blocktime has been set to "TRUE" */
7442 bt_set = TRUE;
7443
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007444 set__bt_set_team( thread->th.th_team, tid, bt_set );
7445 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007446 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7447 __kmp_gtid_from_tid(tid, thread->th.th_team),
7448 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7449}
7450
7451void
7452__kmp_aux_set_defaults(
7453 char const * str,
7454 int len
7455) {
7456 if ( ! __kmp_init_serial ) {
7457 __kmp_serial_initialize();
7458 };
7459 __kmp_env_initialize( str );
7460
7461 if (__kmp_settings
7462#if OMP_40_ENABLED
7463 || __kmp_display_env || __kmp_display_env_verbose
7464#endif // OMP_40_ENABLED
7465 ) {
7466 __kmp_env_print();
7467 }
7468} // __kmp_aux_set_defaults
7469
7470/* ------------------------------------------------------------------------ */
7471
7472/*
7473 * internal fast reduction routines
7474 */
7475
Jim Cownie5e8470a2013-09-27 10:38:44 +00007476PACKED_REDUCTION_METHOD_T
7477__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7478 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7479 kmp_critical_name *lck )
7480{
7481
7482 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7483 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7484 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7485 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7486
7487 PACKED_REDUCTION_METHOD_T retval;
7488
7489 int team_size;
7490
7491 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7492 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7493
7494 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7495 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7496
7497 retval = critical_reduce_block;
7498
7499 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7500
7501 if( team_size == 1 ) {
7502
7503 retval = empty_reduce_block;
7504
7505 } else {
7506
7507 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7508 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7509
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007510 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007511
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007512 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007513
7514 int teamsize_cutoff = 4;
7515
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007516#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7517 if( __kmp_mic_type != non_mic ) {
7518 teamsize_cutoff = 8;
7519 }
7520#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007521 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007522 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523 if ( atomic_available ) {
7524 retval = atomic_reduce_block;
7525 }
7526 } else {
7527 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7528 }
7529 } else if ( atomic_available ) {
7530 retval = atomic_reduce_block;
7531 }
7532 #else
7533 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007534 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007535
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007536 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
Jim Cownie5e8470a2013-09-27 10:38:44 +00007537
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007538 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007539
Jim Cownie5e8470a2013-09-27 10:38:44 +00007540 // basic tuning
7541
7542 if( atomic_available ) {
7543 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7544 retval = atomic_reduce_block;
7545 }
7546 } // otherwise: use critical section
7547
7548 #elif KMP_OS_DARWIN
7549
Jim Cownie5e8470a2013-09-27 10:38:44 +00007550 if( atomic_available && ( num_vars <= 3 ) ) {
7551 retval = atomic_reduce_block;
7552 } else if( tree_available ) {
7553 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7554 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7555 }
7556 } // otherwise: use critical section
7557
7558 #else
7559 #error "Unknown or unsupported OS"
7560 #endif
7561
7562 #else
7563 #error "Unknown or unsupported architecture"
7564 #endif
7565
7566 }
7567
Jim Cownie5e8470a2013-09-27 10:38:44 +00007568 // KMP_FORCE_REDUCTION
7569
Andrey Churbanovec23a952015-08-17 10:12:12 +00007570 // If the team is serialized (team_size == 1), ignore the forced reduction
7571 // method and stay with the unsynchronized method (empty_reduce_block)
7572 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007573
7574 PACKED_REDUCTION_METHOD_T forced_retval;
7575
7576 int atomic_available, tree_available;
7577
7578 switch( ( forced_retval = __kmp_force_reduction_method ) )
7579 {
7580 case critical_reduce_block:
7581 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007582 break;
7583
7584 case atomic_reduce_block:
7585 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7586 KMP_ASSERT( atomic_available ); // atomic_available should be != 0
7587 break;
7588
7589 case tree_reduce_block:
7590 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7591 KMP_ASSERT( tree_available ); // tree_available should be != 0
7592 #if KMP_FAST_REDUCTION_BARRIER
7593 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7594 #endif
7595 break;
7596
7597 default:
7598 KMP_ASSERT( 0 ); // "unsupported method specified"
7599 }
7600
7601 retval = forced_retval;
7602 }
7603
7604 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7605
7606 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7607 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7608
7609 return ( retval );
7610}
7611
7612// this function is for testing set/get/determine reduce method
7613kmp_int32
7614__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007615 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007616}
7617
7618/* ------------------------------------------------------------------------ */