blob: 48aaea419b4fcf9a56fd2953648e376f2fa0f664 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000028
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000029#if OMPT_SUPPORT
30#include "ompt-specific.h"
31#endif
32
Jim Cownie5e8470a2013-09-27 10:38:44 +000033/* these are temporary issues to be dealt with */
34#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#if KMP_OS_WINDOWS
37#include <process.h>
38#endif
39
40
41#if defined(KMP_GOMP_COMPAT)
42char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
43#endif /* defined(KMP_GOMP_COMPAT) */
44
45char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
46#if OMP_40_ENABLED
47 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000048#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000049 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000050#endif
51
52#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000053char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#endif /* KMP_DEBUG */
55
Jim Cownie181b4bb2013-12-23 17:28:57 +000056#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
57
Jim Cownie5e8470a2013-09-27 10:38:44 +000058/* ------------------------------------------------------------------------ */
59/* ------------------------------------------------------------------------ */
60
61kmp_info_t __kmp_monitor;
62
63/* ------------------------------------------------------------------------ */
64/* ------------------------------------------------------------------------ */
65
66/* Forward declarations */
67
68void __kmp_cleanup( void );
69
70static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000071static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000072#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000073static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000074#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000075static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000076void __kmp_fork_barrier( int gtid, int tid );
77void __kmp_join_barrier( int gtid );
78void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000079
Jim Cownie5e8470a2013-09-27 10:38:44 +000080#ifdef USE_LOAD_BALANCE
81static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
82#endif
83
84static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000086static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000088static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
89static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
90static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
91
92/* ------------------------------------------------------------------------ */
93/* ------------------------------------------------------------------------ */
94
95/* Calculate the identifier of the current thread */
96/* fast (and somewhat portable) way to get unique */
97/* identifier of executing thread. */
98/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
99
100int
101__kmp_get_global_thread_id( )
102{
103 int i;
104 kmp_info_t **other_threads;
105 size_t stack_data;
106 char *stack_addr;
107 size_t stack_size;
108 char *stack_base;
109
110 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
112
113 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
114 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
115 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
116 __kmp_init_gtid for this to work. */
117
118 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
119
120#ifdef KMP_TDATA_GTID
121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
123 return __kmp_gtid;
124 }
125#endif
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
129 }
130 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
131
132 stack_addr = (char*) & stack_data;
133 other_threads = __kmp_threads;
134
135 /*
136 ATT: The code below is a source of potential bugs due to unsynchronized access to
137 __kmp_threads array. For example:
138 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
139 2. Current thread is suspended by OS.
140 3. Another thread unregisters and finishes (debug versions of free() may fill memory
141 with something like 0xEF).
142 4. Current thread is resumed.
143 5. Current thread reads junk from *thr.
144 TODO: Fix it.
145 --ln
146 */
147
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
149
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
151 if( !thr ) continue;
152
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155
156 /* stack grows down -- search through all of the active threads */
157
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
160
161 if( stack_diff <= stack_size ) {
162 /* The only way we can be closer than the allocated */
163 /* stack size is if we are running on this thread. */
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
165 return i;
166 }
167 }
168 }
169
170 /* get specific to try and determine our gtid */
171 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
174
175 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
176
177 /* if we havn't been assigned a gtid, then return code */
178 if( i<0 ) return i;
179
180 /* dynamically updated stack window for uber threads to avoid get_specific call */
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
183 }
184
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
190 } else {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
192 }
193
194 /* Reprint stack bounds for ubermaster since they have been refined */
195 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000196 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000199 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000200 "th_%d stack (refinement)", i );
201 }
202 return i;
203}
204
205int
206__kmp_get_global_thread_id_reg( )
207{
208 int gtid;
209
210 if ( !__kmp_init_serial ) {
211 gtid = KMP_GTID_DNE;
212 } else
213#ifdef KMP_TDATA_GTID
214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
216 gtid = __kmp_gtid;
217 } else
218#endif
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
222 } else {
223 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
225 }
226
227 /* we must be a new uber master sibling thread */
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
235 } else {
236 gtid = __kmp_register_root(FALSE);
237 }
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
239 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
240 }
241
242 KMP_DEBUG_ASSERT( gtid >=0 );
243
244 return gtid;
245}
246
247/* caller must hold forkjoin_lock */
248void
249__kmp_check_stack_overlap( kmp_info_t *th )
250{
251 int f;
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
254 int gtid;
255
256 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000258 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260
261 gtid = __kmp_gtid_from_thread( th );
262
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)", "mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
267 } else {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
271 }
272 }
273
274 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 {
278 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000280 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 }
283
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
286
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
293
294 /* Print the other stack values before the abort */
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
300
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
302 }
303 }
304 }
305 }
306 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
307}
308
309
310/* ------------------------------------------------------------------------ */
311
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312/* ------------------------------------------------------------------------ */
313
314void
315__kmp_infinite_loop( void )
316{
317 static int done = FALSE;
318
319 while (! done) {
320 KMP_YIELD( 1 );
321 }
322}
323
324#define MAX_MESSAGE 512
325
326void
327__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
328 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 va_list ap;
330
331 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000332 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000336 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 if(gtid >= 0) {
338 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
341 if(node < 0) /* doesn't work, so don't try this next time */
342 __kmp_storage_map_verbose = FALSE;
343 else {
344 char *last;
345 int lastNode;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
347
348 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
350 if(localProc >= 0)
351 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
352 else
353 __kmp_printf_no_lock(" GTID %d\n", gtid);
354# if KMP_USE_PRCTL
355/* The more elaborate format is disabled for now because of the prctl hanging bug. */
356 do {
357 last = p1;
358 lastNode = node;
359 /* This loop collates adjacent pages with the same host node. */
360 do {
361 (char*)p1 += PAGE_SIZE;
362 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
364 (char*)p1 - 1, lastNode);
365 } while(p1 <= p2);
366# else
367 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
368 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
369 if(p1 < p2) {
370 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
371 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
372 }
373# endif
374 }
375 }
376 } else
377 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
378 }
379#endif /* KMP_PRINT_DATA_PLACEMENT */
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
381}
382
383void
384__kmp_warn( char const * format, ... )
385{
386 char buffer[MAX_MESSAGE];
387 va_list ap;
388
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
390 return;
391 }
392
393 va_start( ap, format );
394
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000395 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
399
400 va_end( ap );
401}
402
403void
404__kmp_abort_process()
405{
406
407 // Later threads may stall here, but that's ok because abort() will kill them.
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
409
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
412 }; // if
413
414 if ( KMP_OS_WINDOWS ) {
415 // Let other threads know of abnormal termination and prevent deadlock
416 // if abort happened during library initialization or shutdown
417 __kmp_global.g.g_abort = SIGABRT;
418
419 /*
420 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
421 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
422 works well, but this function is not available in VS7 (this is not problem for DLL, but
423 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
424 not help, at least in some versions of MS C RTL.
425
426 It seems following sequence is the only way to simulate abort() and avoid pop-up error
427 box.
428 */
429 raise( SIGABRT );
430 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
431 } else {
432 abort();
433 }; // if
434
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
437
438} // __kmp_abort_process
439
440void
441__kmp_abort_thread( void )
442{
443 // TODO: Eliminate g_abort global variable and this function.
444 // In case of abort just call abort(), it will kill all the threads.
445 __kmp_infinite_loop();
446} // __kmp_abort_thread
447
448/* ------------------------------------------------------------------------ */
449
450/*
451 * Print out the storage map for the major kmp_info_t thread data structures
452 * that are allocated together.
453 */
454
455static void
456__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
457{
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
459
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
462
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
465
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
468
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
472
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
476
477 #if KMP_FAST_REDUCTION_BARRIER
478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER
482}
483
484/*
485 * Print out the storage map for the major kmp_team_t team data structures
486 * that are allocated together.
487 */
488
489static void
490__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
491{
492 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
493 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
494 header, team_id );
495
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
498
499
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
502
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
505
506 #if KMP_FAST_REDUCTION_BARRIER
507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
513
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
516
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
519 header, team_id );
520
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
524}
525
526static void __kmp_init_allocator() {}
527static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000528
529/* ------------------------------------------------------------------------ */
530
Jonathan Peyton99016992015-05-26 17:32:53 +0000531#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532# if KMP_OS_WINDOWS
533
534
535static void
536__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
537 // TODO: Change to __kmp_break_bootstrap_lock().
538 __kmp_init_bootstrap_lock( lck ); // make the lock released
539}
540
541static void
542__kmp_reset_locks_on_process_detach( int gtid_req ) {
543 int i;
544 int thread_count;
545
546 // PROCESS_DETACH is expected to be called by a thread
547 // that executes ProcessExit() or FreeLibrary().
548 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
549 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
550 // However, in fact, some threads can be still alive here, although being about to be terminated.
551 // The threads in the array with ds_thread==0 are most suspicious.
552 // Actually, it can be not safe to access the __kmp_threads[].
553
554 // TODO: does it make sense to check __kmp_roots[] ?
555
556 // Let's check that there are no other alive threads registered with the OMP lib.
557 while( 1 ) {
558 thread_count = 0;
559 for( i = 0; i < __kmp_threads_capacity; ++i ) {
560 if( !__kmp_threads ) continue;
561 kmp_info_t* th = __kmp_threads[ i ];
562 if( th == NULL ) continue;
563 int gtid = th->th.th_info.ds.ds_gtid;
564 if( gtid == gtid_req ) continue;
565 if( gtid < 0 ) continue;
566 DWORD exit_val;
567 int alive = __kmp_is_thread_alive( th, &exit_val );
568 if( alive ) {
569 ++thread_count;
570 }
571 }
572 if( thread_count == 0 ) break; // success
573 }
574
575 // Assume that I'm alone.
576
577 // Now it might be probably safe to check and reset locks.
578 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
579 __kmp_reset_lock( &__kmp_forkjoin_lock );
580 #ifdef KMP_DEBUG
581 __kmp_reset_lock( &__kmp_stdio_lock );
582 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583}
584
585BOOL WINAPI
586DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
587 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
588
589 switch( fdwReason ) {
590
591 case DLL_PROCESS_ATTACH:
592 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
593
594 return TRUE;
595
596 case DLL_PROCESS_DETACH:
597 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
598 __kmp_gtid_get_specific() ));
599
600 if( lpReserved != NULL )
601 {
602 // lpReserved is used for telling the difference:
603 // lpReserved == NULL when FreeLibrary() was called,
604 // lpReserved != NULL when the process terminates.
605 // When FreeLibrary() is called, worker threads remain alive.
606 // So they will release the forkjoin lock by themselves.
607 // When the process terminates, worker threads disappear triggering
608 // the problem of unreleased forkjoin lock as described below.
609
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000610 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611 // The problem comes up if that worker thread becomes dead
612 // before it releases the forkjoin lock.
613 // The forkjoin lock remains taken, while the thread
614 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
615 // will try to take the forkjoin lock and will always fail,
616 // so that the application will never finish [normally].
617 // This scenario is possible if __kmpc_end() has not been executed.
618 // It looks like it's not a corner case, but common cases:
619 // - the main function was compiled by an alternative compiler;
620 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
621 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
622 // - alive foreign thread prevented __kmpc_end from doing cleanup.
623
624 // This is a hack to work around the problem.
625 // TODO: !!! to figure out something better.
626 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
627 }
628
629 __kmp_internal_end_library( __kmp_gtid_get_specific() );
630
631 return TRUE;
632
633 case DLL_THREAD_ATTACH:
634 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
635
636 /* if we wanted to register new siblings all the time here call
637 * __kmp_get_gtid(); */
638 return TRUE;
639
640 case DLL_THREAD_DETACH:
641 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
642 __kmp_gtid_get_specific() ));
643
644 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
645 return TRUE;
646 }
647
648 return TRUE;
649}
650
651# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000652#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000653
654
655/* ------------------------------------------------------------------------ */
656
657/* Change the library type to "status" and return the old type */
658/* called from within initialization routines where __kmp_initz_lock is held */
659int
660__kmp_change_library( int status )
661{
662 int old_status;
663
664 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
665
666 if (status) {
667 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
668 }
669 else {
670 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
671 }
672
673 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
674}
675
676/* ------------------------------------------------------------------------ */
677/* ------------------------------------------------------------------------ */
678
679/* __kmp_parallel_deo --
680 * Wait until it's our turn.
681 */
682void
683__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
684{
685 int gtid = *gtid_ref;
686#ifdef BUILD_PARALLEL_ORDERED
687 kmp_team_t *team = __kmp_team_from_gtid( gtid );
688#endif /* BUILD_PARALLEL_ORDERED */
689
690 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000691 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000692#if KMP_USE_DYNAMIC_LOCK
693 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
694#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000696#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697 }
698#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000699 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000701 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 KMP_MB();
703 }
704#endif /* BUILD_PARALLEL_ORDERED */
705}
706
707/* __kmp_parallel_dxo --
708 * Signal the next task.
709 */
710
711void
712__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
713{
714 int gtid = *gtid_ref;
715#ifdef BUILD_PARALLEL_ORDERED
716 int tid = __kmp_tid_from_gtid( gtid );
717 kmp_team_t *team = __kmp_team_from_gtid( gtid );
718#endif /* BUILD_PARALLEL_ORDERED */
719
720 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000721 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
723 }
724#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000725 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726 KMP_MB(); /* Flush all pending memory write invalidates. */
727
728 /* use the tid of the next thread in this team */
729 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000730 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000732#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000733 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000734 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
735 /* accept blame for "ordered" waiting */
736 kmp_info_t *this_thread = __kmp_threads[gtid];
737 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
738 this_thread->th.ompt_thread_info.wait_id);
739 }
740#endif
741
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742 KMP_MB(); /* Flush all pending memory write invalidates. */
743 }
744#endif /* BUILD_PARALLEL_ORDERED */
745}
746
747/* ------------------------------------------------------------------------ */
748/* ------------------------------------------------------------------------ */
749
750/* ------------------------------------------------------------------------ */
751/* ------------------------------------------------------------------------ */
752
753/* The BARRIER for a SINGLE process section is always explicit */
754
755int
756__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
757{
758 int status;
759 kmp_info_t *th;
760 kmp_team_t *team;
761
762 if( ! TCR_4(__kmp_init_parallel) )
763 __kmp_parallel_initialize();
764
765 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000766 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767 status = 0;
768
769 th->th.th_ident = id_ref;
770
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772 status = 1;
773 } else {
774 kmp_int32 old_this = th->th.th_local.this_construct;
775
776 ++th->th.th_local.this_construct;
777 /* try to set team count to thread count--success means thread got the
778 single block
779 */
780 /* TODO: Should this be acquire or release? */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000781 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782 th->th.th_local.this_construct);
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000783#if USE_ITT_BUILD
784 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
785#if OMP_40_ENABLED
786 th->th.th_teams_microtask == NULL &&
787#endif
788 team->t.t_active_level == 1 )
789 { // Only report metadata by master of active team at level 1
790 __kmp_itt_metadata_single( id_ref );
791 }
792#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793 }
794
795 if( __kmp_env_consistency_check ) {
796 if (status && push_ws) {
797 __kmp_push_workshare( gtid, ct_psingle, id_ref );
798 } else {
799 __kmp_check_workshare( gtid, ct_psingle, id_ref );
800 }
801 }
802#if USE_ITT_BUILD
803 if ( status ) {
804 __kmp_itt_single_start( gtid );
805 }
806#endif /* USE_ITT_BUILD */
807 return status;
808}
809
810void
811__kmp_exit_single( int gtid )
812{
813#if USE_ITT_BUILD
814 __kmp_itt_single_end( gtid );
815#endif /* USE_ITT_BUILD */
816 if( __kmp_env_consistency_check )
817 __kmp_pop_workshare( gtid, ct_psingle, NULL );
818}
819
820
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821/*
822 * determine if we can go parallel or must use a serialized parallel region and
823 * how many threads we can use
824 * set_nproc is the number of threads requested for the team
825 * returns 0 if we should serialize or only use one thread,
826 * otherwise the number of threads to use
827 * The forkjoin lock is held by the caller.
828 */
829static int
830__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
831 int master_tid, int set_nthreads
832#if OMP_40_ENABLED
833 , int enter_teams
834#endif /* OMP_40_ENABLED */
835)
836{
837 int capacity;
838 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000839 KMP_DEBUG_ASSERT( __kmp_init_serial );
840 KMP_DEBUG_ASSERT( root && parent_team );
841
842 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843 // If dyn-var is set, dynamically adjust the number of desired threads,
844 // according to the method specified by dynamic_mode.
845 //
846 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
848 ;
849 }
850#ifdef USE_LOAD_BALANCE
851 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
852 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
853 if ( new_nthreads == 1 ) {
854 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
855 master_tid ));
856 return 1;
857 }
858 if ( new_nthreads < set_nthreads ) {
859 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
860 master_tid, new_nthreads ));
861 }
862 }
863#endif /* USE_LOAD_BALANCE */
864 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
865 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
866 : root->r.r_hot_team->t.t_nproc);
867 if ( new_nthreads <= 1 ) {
868 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
869 master_tid ));
870 return 1;
871 }
872 if ( new_nthreads < set_nthreads ) {
873 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
874 master_tid, new_nthreads ));
875 }
876 else {
877 new_nthreads = set_nthreads;
878 }
879 }
880 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
881 if ( set_nthreads > 2 ) {
882 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
883 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
884 if ( new_nthreads == 1 ) {
885 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
886 master_tid ));
887 return 1;
888 }
889 if ( new_nthreads < set_nthreads ) {
890 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
891 master_tid, new_nthreads ));
892 }
893 }
894 }
895 else {
896 KMP_ASSERT( 0 );
897 }
898
899 //
900 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
901 //
902 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
903 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
904 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
905 root->r.r_hot_team->t.t_nproc );
906 if ( tl_nthreads <= 0 ) {
907 tl_nthreads = 1;
908 }
909
910 //
911 // If dyn-var is false, emit a 1-time warning.
912 //
913 if ( ! get__dynamic_2( parent_team, master_tid )
914 && ( ! __kmp_reserve_warn ) ) {
915 __kmp_reserve_warn = 1;
916 __kmp_msg(
917 kmp_ms_warning,
918 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
919 KMP_HNT( Unset_ALL_THREADS ),
920 __kmp_msg_null
921 );
922 }
923 if ( tl_nthreads == 1 ) {
924 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
925 master_tid ));
926 return 1;
927 }
928 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
929 master_tid, tl_nthreads ));
930 new_nthreads = tl_nthreads;
931 }
932
Jim Cownie5e8470a2013-09-27 10:38:44 +0000933 //
934 // Check if the threads array is large enough, or needs expanding.
935 //
936 // See comment in __kmp_register_root() about the adjustment if
937 // __kmp_threads[0] == NULL.
938 //
939 capacity = __kmp_threads_capacity;
940 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
941 --capacity;
942 }
943 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
944 root->r.r_hot_team->t.t_nproc ) > capacity ) {
945 //
946 // Expand the threads array.
947 //
948 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
949 root->r.r_hot_team->t.t_nproc ) - capacity;
950 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
951 if ( slotsAdded < slotsRequired ) {
952 //
953 // The threads array was not expanded enough.
954 //
955 new_nthreads -= ( slotsRequired - slotsAdded );
956 KMP_ASSERT( new_nthreads >= 1 );
957
958 //
959 // If dyn-var is false, emit a 1-time warning.
960 //
961 if ( ! get__dynamic_2( parent_team, master_tid )
962 && ( ! __kmp_reserve_warn ) ) {
963 __kmp_reserve_warn = 1;
964 if ( __kmp_tp_cached ) {
965 __kmp_msg(
966 kmp_ms_warning,
967 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
968 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
969 KMP_HNT( PossibleSystemLimitOnThreads ),
970 __kmp_msg_null
971 );
972 }
973 else {
974 __kmp_msg(
975 kmp_ms_warning,
976 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
977 KMP_HNT( SystemLimitOnThreads ),
978 __kmp_msg_null
979 );
980 }
981 }
982 }
983 }
984
985 if ( new_nthreads == 1 ) {
986 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
987 __kmp_get_gtid(), set_nthreads ) );
988 return 1;
989 }
990
991 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
992 __kmp_get_gtid(), new_nthreads, set_nthreads ));
993 return new_nthreads;
994}
995
996/* ------------------------------------------------------------------------ */
997/* ------------------------------------------------------------------------ */
998
999/* allocate threads from the thread pool and assign them to the new team */
1000/* we are assured that there are enough threads available, because we
1001 * checked on that earlier within critical section forkjoin */
1002
1003static void
1004__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1005 kmp_info_t *master_th, int master_gtid )
1006{
1007 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001008 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001009
1010 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1011 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1012 KMP_MB();
1013
1014 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001015 master_th->th.th_info.ds.ds_tid = 0;
1016 master_th->th.th_team = team;
1017 master_th->th.th_team_nproc = team->t.t_nproc;
1018 master_th->th.th_team_master = master_th;
1019 master_th->th.th_team_serialized = FALSE;
1020 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001021
1022 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001023#if KMP_NESTED_HOT_TEAMS
1024 use_hot_team = 0;
1025 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1026 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1027 int level = team->t.t_active_level - 1; // index in array of hot teams
1028 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1029 if( master_th->th.th_teams_size.nteams > 1 ) {
1030 ++level; // level was not increased in teams construct for team_of_masters
1031 }
1032 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1033 master_th->th.th_teams_level == team->t.t_level ) {
1034 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1035 } // team->t.t_level will be increased inside parallel
1036 }
1037 if( level < __kmp_hot_teams_max_level ) {
1038 if( hot_teams[level].hot_team ) {
1039 // hot team has already been allocated for given level
1040 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1041 use_hot_team = 1; // the team is ready to use
1042 } else {
1043 use_hot_team = 0; // AC: threads are not allocated yet
1044 hot_teams[level].hot_team = team; // remember new hot team
1045 hot_teams[level].hot_team_nth = team->t.t_nproc;
1046 }
1047 } else {
1048 use_hot_team = 0;
1049 }
1050 }
1051#else
1052 use_hot_team = team == root->r.r_hot_team;
1053#endif
1054 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055
1056 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001057 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058 __kmp_initialize_info( master_th, team, 0, master_gtid );
1059
1060 /* now, install the worker threads */
1061 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1062
1063 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1065 team->t.t_threads[ i ] = thr;
1066 KMP_DEBUG_ASSERT( thr );
1067 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001069 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001070 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1071 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1072 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1073 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001074#if OMP_40_ENABLED
1075 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1076 thr->th.th_teams_level = master_th->th.th_teams_level;
1077 thr->th.th_teams_size = master_th->th.th_teams_size;
1078#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 { // Initialize threads' barrier data.
1080 int b;
1081 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1082 for ( b = 0; b < bs_last_barrier; ++ b ) {
1083 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001084 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001085#if USE_DEBUGGER
1086 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088 }; // for b
1089 }
1090 }
1091
Alp Toker98758b02014-03-02 04:12:06 +00001092#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001093 __kmp_partition_places( team );
1094#endif
1095
1096 }
1097
1098 KMP_MB();
1099}
1100
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001101#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1102//
1103// Propagate any changes to the floating point control registers out to the team
1104// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1105// so we don't make changes unless they are needed.
1106//
1107inline static void
1108propagateFPControl(kmp_team_t * team)
1109{
1110 if ( __kmp_inherit_fp_control ) {
1111 kmp_int16 x87_fpu_control_word;
1112 kmp_uint32 mxcsr;
1113
1114 // Get master values of FPU control flags (both X87 and vector)
1115 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1116 __kmp_store_mxcsr( &mxcsr );
1117 mxcsr &= KMP_X86_MXCSR_MASK;
1118
1119 // There is no point looking at t_fp_control_saved here.
1120 // If it is TRUE, we still have to update the values if they are different from those we now have.
1121 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1122 // that the values in the team are the same as those we have.
1123 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1124 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1125 // cache-line into a written state, causing all threads in the team to have to read it again.
1126 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1127 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1128 }
1129 if ( team->t.t_mxcsr != mxcsr ) {
1130 team->t.t_mxcsr = mxcsr;
1131 }
1132 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1133 // So we must ensure it is correct.
1134 if (!team->t.t_fp_control_saved) {
1135 team->t.t_fp_control_saved = TRUE;
1136 }
1137 }
1138 else {
1139 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
1140 if (team->t.t_fp_control_saved)
1141 team->t.t_fp_control_saved = FALSE;
1142 }
1143}
1144
1145// Do the opposite, setting the hardware registers to the updated values from the team.
1146inline static void
1147updateHWFPControl(kmp_team_t * team)
1148{
1149 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1150 //
1151 // Only reset the fp control regs if they have been changed in the team.
1152 // the parallel region that we are exiting.
1153 //
1154 kmp_int16 x87_fpu_control_word;
1155 kmp_uint32 mxcsr;
1156 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1157 __kmp_store_mxcsr( &mxcsr );
1158 mxcsr &= KMP_X86_MXCSR_MASK;
1159
1160 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1161 __kmp_clear_x87_fpu_status_word();
1162 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1163 }
1164
1165 if ( team->t.t_mxcsr != mxcsr ) {
1166 __kmp_load_mxcsr( &team->t.t_mxcsr );
1167 }
1168 }
1169}
1170#else
1171# define propagateFPControl(x) ((void)0)
1172# define updateHWFPControl(x) ((void)0)
1173#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1174
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175static void
1176__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1177
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178/*
1179 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1180 */
1181void
1182__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1183{
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1186
1187 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1188
1189 /* Skip all this code for autopar serialized loops since it results in
1190 unacceptable overhead */
1191 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1192 return;
1193
1194 if( ! TCR_4( __kmp_init_parallel ) )
1195 __kmp_parallel_initialize();
1196
1197 this_thr = __kmp_threads[ global_tid ];
1198 serial_team = this_thr->th.th_serial_team;
1199
1200 /* utilize the serialized team held by this thread */
1201 KMP_DEBUG_ASSERT( serial_team );
1202 KMP_MB();
1203
1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001205 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1206 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001207 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1208 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1209 this_thr->th.th_task_team = NULL;
1210 }
1211
1212#if OMP_40_ENABLED
1213 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1214 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1215 proc_bind = proc_bind_false;
1216 }
1217 else if ( proc_bind == proc_bind_default ) {
1218 //
1219 // No proc_bind clause was specified, so use the current value
1220 // of proc-bind-var for this parallel region.
1221 //
1222 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1223 }
1224 //
1225 // Reset for next parallel region
1226 //
1227 this_thr->th.th_set_proc_bind = proc_bind_default;
1228#endif /* OMP_40_ENABLED */
1229
1230 if( this_thr->th.th_team != serial_team ) {
1231 // Nested level will be an index in the nested nthreads array
1232 int level = this_thr->th.th_team->t.t_level;
1233
1234 if( serial_team->t.t_serialized ) {
1235 /* this serial team was already used
1236 * TODO increase performance by making this locks more specific */
1237 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238
1239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1240
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001241#if OMPT_SUPPORT
1242 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1243#endif
1244
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001246#if OMPT_SUPPORT
1247 ompt_parallel_id,
1248#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001249#if OMP_40_ENABLED
1250 proc_bind,
1251#endif
1252 & this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL) );
1254 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1255 KMP_ASSERT( new_team );
1256
1257 /* setup new serialized team and install it */
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1262
1263 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1264 global_tid, serial_team ) );
1265
1266
1267 /* TODO the above breaks the requirement that if we run out of
1268 * resources, then we can still guarantee that serialized teams
1269 * are ok, since we may need to allocate a new one */
1270 } else {
1271 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1272 global_tid, serial_team ) );
1273 }
1274
1275 /* we have to initialize this serial team */
1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1277 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1278 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1279 serial_team->t.t_ident = loc;
1280 serial_team->t.t_serialized = 1;
1281 serial_team->t.t_nproc = 1;
1282 serial_team->t.t_parent = this_thr->th.th_team;
1283 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1284 this_thr->th.th_team = serial_team;
1285 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1286
1287 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1288 global_tid, this_thr->th.th_current_task ) );
1289 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1290 this_thr->th.th_current_task->td_flags.executing = 0;
1291
1292 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1293
1294 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1295 each serialized task represented by team->t.t_serialized? */
1296 copy_icvs(
1297 & this_thr->th.th_current_task->td_icvs,
1298 & this_thr->th.th_current_task->td_parent->td_icvs );
1299
1300 // Thread value exists in the nested nthreads array for the next nested level
1301 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1302 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1303 }
1304
1305#if OMP_40_ENABLED
1306 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1307 this_thr->th.th_current_task->td_icvs.proc_bind
1308 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1309 }
1310#endif /* OMP_40_ENABLED */
1311
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001312#if USE_DEBUGGER
1313 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1314#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315 this_thr->th.th_info.ds.ds_tid = 0;
1316
1317 /* set thread cache values */
1318 this_thr->th.th_team_nproc = 1;
1319 this_thr->th.th_team_master = this_thr;
1320 this_thr->th.th_team_serialized = 1;
1321
1322 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1323 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1324
1325 propagateFPControl (serial_team);
1326
1327 /* check if we need to allocate dispatch buffers stack */
1328 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1329 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1330 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1331 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1332 }
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1334
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335#if OMPT_SUPPORT
1336 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1337 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1338#endif
1339
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001340 KMP_MB();
1341
1342 } else {
1343 /* this serialized team is already being used,
1344 * that's fine, just add another nested level */
1345 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1347 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1348 ++ serial_team->t.t_serialized;
1349 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1350
1351 // Nested level will be an index in the nested nthreads array
1352 int level = this_thr->th.th_team->t.t_level;
1353 // Thread value exists in the nested nthreads array for the next nested level
1354 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1355 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1356 }
1357 serial_team->t.t_level++;
1358 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1359 global_tid, serial_team, serial_team->t.t_level ) );
1360
1361 /* allocate/push dispatch buffers stack */
1362 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1363 {
1364 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1365 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1366 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1367 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1368 }
1369 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1370
1371 KMP_MB();
1372 }
1373
1374 if ( __kmp_env_consistency_check )
1375 __kmp_push_parallel( global_tid, NULL );
1376
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001377}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001378
Jim Cownie5e8470a2013-09-27 10:38:44 +00001379/* most of the work for a fork */
1380/* return true if we really went parallel, false if serialized */
1381int
1382__kmp_fork_call(
1383 ident_t * loc,
1384 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001385 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001387#if OMPT_SUPPORT
1388 void *unwrapped_task,
1389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 microtask_t microtask,
1391 launch_t invoker,
1392/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001393#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394 va_list * ap
1395#else
1396 va_list ap
1397#endif
1398 )
1399{
1400 void **argv;
1401 int i;
1402 int master_tid;
1403 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001404 kmp_team_t *team;
1405 kmp_team_t *parent_team;
1406 kmp_info_t *master_th;
1407 kmp_root_t *root;
1408 int nthreads;
1409 int master_active;
1410 int master_set_numthreads;
1411 int level;
1412#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 int teams_level;
1415#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001416#if KMP_NESTED_HOT_TEAMS
1417 kmp_hot_team_ptr_t **p_hot_teams;
1418#endif
1419 { // KMP_TIME_BLOCK
Jonathan Peyton45be4502015-08-11 21:36:41 +00001420 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1421 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001422
1423 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001424 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1425 /* Some systems prefer the stack for the root thread(s) to start with */
1426 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001427 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 /* These 2 lines below are so this does not get optimized out */
1429 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1430 __kmp_stkpadding += (short)((kmp_int64)dummy);
1431 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432
1433 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001434 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435 if( ! TCR_4(__kmp_init_parallel) )
1436 __kmp_parallel_initialize();
1437
1438 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001439 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1440 parent_team = master_th->th.th_team;
1441 master_tid = master_th->th.th_info.ds.ds_tid;
1442 master_this_cons = master_th->th.th_local.this_construct;
1443 root = master_th->th.th_root;
1444 master_active = root->r.r_active;
1445 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001446
1447#if OMPT_SUPPORT
1448 ompt_parallel_id_t ompt_parallel_id;
1449 ompt_task_id_t ompt_task_id;
1450 ompt_frame_t *ompt_frame;
1451 ompt_task_id_t my_task_id;
1452 ompt_parallel_id_t my_parallel_id;
1453
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001454 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001455 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1456 ompt_task_id = __ompt_get_task_id_internal(0);
1457 ompt_frame = __ompt_get_task_frame_internal(0);
1458 }
1459#endif
1460
Jim Cownie5e8470a2013-09-27 10:38:44 +00001461 // Nested level will be an index in the nested nthreads array
1462 level = parent_team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jim Cownie5e8470a2013-09-27 10:38:44 +00001465 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1466#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001467#if KMP_NESTED_HOT_TEAMS
1468 p_hot_teams = &master_th->th.th_hot_teams;
1469 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1470 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1473 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1474 }
1475#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001476
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001477#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001478 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1480 int team_size = master_set_numthreads;
1481
1482 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1483 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001484 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001485 }
1486#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 master_th->th.th_ident = loc;
1489
1490#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001491 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1493 // AC: This is start of parallel that is nested inside teams construct.
1494 // The team is actual (hot), all workers are ready at the fork barrier.
1495 // No lock needed to initialize the team a bit, then free workers.
1496 parent_team->t.t_ident = loc;
1497 parent_team->t.t_argc = argc;
1498 argv = (void**)parent_team->t.t_argv;
1499 for( i=argc-1; i >= 0; --i )
1500/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001501#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001502 *argv++ = va_arg( *ap, void * );
1503#else
1504 *argv++ = va_arg( ap, void * );
1505#endif
1506 /* Increment our nested depth levels, but not increase the serialization */
1507 if ( parent_team == master_th->th.th_serial_team ) {
1508 // AC: we are in serialized parallel
1509 __kmpc_serialized_parallel(loc, gtid);
1510 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1511 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1512 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001513
1514#if OMPT_SUPPORT
1515 void *dummy;
1516 void **exit_runtime_p;
1517
1518 ompt_lw_taskteam_t lw_taskteam;
1519
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001520 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001521 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1522 unwrapped_task, ompt_parallel_id);
1523 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1524 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1525
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1527
1528#if OMPT_TRACE
1529 /* OMPT implicit task begin */
1530 my_task_id = lw_taskteam.ompt_task_info.task_id;
1531 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001532 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001533 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1534 my_parallel_id, my_task_id);
1535 }
1536#endif
1537
1538 /* OMPT state */
1539 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1540 } else {
1541 exit_runtime_p = &dummy;
1542 }
1543#endif
1544
Jonathan Peyton45be4502015-08-11 21:36:41 +00001545 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001546 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1547 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001548 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001550 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001551#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001552 );
1553 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001554
1555#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001556 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557#if OMPT_TRACE
1558 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1559
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001560 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001561 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1562 ompt_parallel_id, ompt_task_id);
1563 }
1564
1565 __ompt_lw_taskteam_unlink(master_th);
1566 // reset clear the task id only after unlinking the task
1567 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1568#endif
1569
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001570 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001571 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001572 ompt_parallel_id, ompt_task_id,
1573 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001574 }
1575 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1576 }
1577#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578 return TRUE;
1579 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001580
Jim Cownie5e8470a2013-09-27 10:38:44 +00001581 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001582#if OMPT_SUPPORT
1583 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1584#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 parent_team->t.t_invoke = invoker;
1586 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1587 parent_team->t.t_active_level ++;
1588 parent_team->t.t_level ++;
1589
1590 /* Change number of threads in the team if requested */
1591 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001592 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593 // AC: only can reduce the number of threads dynamically, cannot increase
1594 kmp_info_t **other_threads = parent_team->t.t_threads;
1595 parent_team->t.t_nproc = master_set_numthreads;
1596 for ( i = 0; i < master_set_numthreads; ++i ) {
1597 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1598 }
1599 // Keep extra threads hot in the team for possible next parallels
1600 }
1601 master_th->th.th_set_nproc = 0;
1602 }
1603
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001604#if USE_DEBUGGER
1605 if ( __kmp_debugging ) { // Let debugger override number of threads.
1606 int nth = __kmp_omp_num_threads( loc );
1607 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1608 master_set_numthreads = nth;
1609 }; // if
1610 }; // if
1611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612
1613 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1614 __kmp_internal_fork( loc, gtid, parent_team );
1615 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1616
1617 /* Invoke microtask for MASTER thread */
1618 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1619 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1620
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001621 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001622 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1623 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001624 if (! parent_team->t.t_invoke( gtid )) {
1625 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627 }
1628 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1629 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1630 KMP_MB(); /* Flush all pending memory write invalidates. */
1631
1632 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1633
1634 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001635 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636#endif /* OMP_40_ENABLED */
1637
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001638#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001640 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001642#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643
Jim Cownie5e8470a2013-09-27 10:38:44 +00001644 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1645 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001647#if OMP_40_ENABLED
1648 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1649#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001651 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001652
1653 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1654 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1655 if (nthreads > 1) {
1656 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1657#if OMP_40_ENABLED
1658 && !enter_teams
1659#endif /* OMP_40_ENABLED */
1660 ) ) || ( __kmp_library == library_serial ) ) {
1661 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1662 gtid, nthreads ));
1663 nthreads = 1;
1664 }
1665 }
1666 if ( nthreads > 1 ) {
1667 /* determine how many new threads we can use */
1668 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1669
1670 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672/* AC: If we execute teams from parallel region (on host), then teams should be created
1673 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1674 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001675 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001676#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001677 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001678 if ( nthreads == 1 ) {
1679 // Free lock for single thread execution here;
1680 // for multi-thread execution it will be freed later
1681 // after team of threads created and initialized
1682 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1683 }
1684 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685 }
1686 KMP_DEBUG_ASSERT( nthreads > 0 );
1687
1688 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001689 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691 /* create a serialized parallel region? */
1692 if ( nthreads == 1 ) {
1693 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001694#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 void * args[ argc ];
1696#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001697 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001698#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1701
1702 __kmpc_serialized_parallel(loc, gtid);
1703
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707#if OMP_40_ENABLED
1708 if ( !ap ) {
1709 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001710 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001712
1713#if OMPT_SUPPORT
1714 void *dummy;
1715 void **exit_runtime_p;
1716
1717 ompt_lw_taskteam_t lw_taskteam;
1718
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001719 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001720 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1721 unwrapped_task, ompt_parallel_id);
1722 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1723 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1724
1725 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1726
1727#if OMPT_TRACE
1728 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001729 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001730 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1731 ompt_parallel_id, my_task_id);
1732 }
1733#endif
1734
1735 /* OMPT state */
1736 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1737 } else {
1738 exit_runtime_p = &dummy;
1739 }
1740#endif
1741
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001742 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001743 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1744 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1746#if OMPT_SUPPORT
1747 , exit_runtime_p
1748#endif
1749 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751
1752#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001753 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001754 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1755
1756#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001757 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1759 ompt_parallel_id, ompt_task_id);
1760 }
1761#endif
1762
1763 __ompt_lw_taskteam_unlink(master_th);
1764 // reset clear the task id only after unlinking the task
1765 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1766
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001767 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001768 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001769 ompt_parallel_id, ompt_task_id,
1770 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771 }
1772 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1773 }
1774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001775 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1776 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1777 team = master_th->th.th_team;
1778 //team->t.t_pkfn = microtask;
1779 team->t.t_invoke = invoker;
1780 __kmp_alloc_argv_entries( argc, team, TRUE );
1781 team->t.t_argc = argc;
1782 argv = (void**) team->t.t_argv;
1783 if ( ap ) {
1784 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001785// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001786# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001787 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001788# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001789 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001791 } else {
1792 for( i=0; i < argc; ++i )
1793 // Get args from parent team for teams construct
1794 argv[i] = parent_team->t.t_argv[i];
1795 }
1796 // AC: revert change made in __kmpc_serialized_parallel()
1797 // because initial code in teams should have level=0
1798 team->t.t_level--;
1799 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001801 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1802 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 invoker(gtid);
1804 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805 } else {
1806#endif /* OMP_40_ENABLED */
1807 argv = args;
1808 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001810#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001816
1817#if OMPT_SUPPORT
1818 void *dummy;
1819 void **exit_runtime_p;
1820
1821 ompt_lw_taskteam_t lw_taskteam;
1822
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001823 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001824 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1825 unwrapped_task, ompt_parallel_id);
1826 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1827 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1828
1829 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1830
1831#if OMPT_TRACE
1832 /* OMPT implicit task begin */
1833 my_task_id = lw_taskteam.ompt_task_info.task_id;
1834 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001835 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001836 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1837 my_parallel_id, my_task_id);
1838 }
1839#endif
1840
1841 /* OMPT state */
1842 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1843 } else {
1844 exit_runtime_p = &dummy;
1845 }
1846#endif
1847
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001848 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001849 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1850 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001851 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1852#if OMPT_SUPPORT
1853 , exit_runtime_p
1854#endif
1855 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001856 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001857
1858#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001859 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860#if OMPT_TRACE
1861 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1862
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001863 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1865 my_parallel_id, my_task_id);
1866 }
1867#endif
1868
1869 __ompt_lw_taskteam_unlink(master_th);
1870 // reset clear the task id only after unlinking the task
1871 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1872
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001873 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001874 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001875 ompt_parallel_id, ompt_task_id,
1876 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001877 }
1878 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1879 }
1880#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881#if OMP_40_ENABLED
1882 }
1883#endif /* OMP_40_ENABLED */
1884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001885 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886#if OMPT_SUPPORT
1887 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1888 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1889 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1890 unwrapped_task, ompt_parallel_id);
1891
1892 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1893 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1894 __ompt_lw_taskteam_link(lwt, master_th);
1895#endif
1896
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001897 // we were called from GNU native code
1898 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1899 return FALSE;
1900 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001902 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903 }
1904
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 KMP_MB();
1908 return FALSE;
1909 }
1910
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // GEH: only modify the executing flag in the case when not serialized
1912 // serialized case is handled in kmpc_serialized_parallel
1913 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001914 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1915 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1917 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1918 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919
1920#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001921 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922#endif /* OMP_40_ENABLED */
1923 {
1924 /* Increment our nested depth level */
1925 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1926 }
1927
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001930 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1931 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 }
1933 else {
1934 nthreads_icv = 0; // don't update
1935 }
1936
1937#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1942 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
1944 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001945 if (proc_bind == proc_bind_default) {
1946 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1947 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1950 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001952 if ((level+1 < __kmp_nested_proc_bind.used)
1953 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1954 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
1956 }
1957
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 master_th->th.th_set_proc_bind = proc_bind_default;
1960#endif /* OMP_40_ENABLED */
1961
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001962 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001964 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001966 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 new_icvs.nproc = nthreads_icv;
1972 }
1973
1974#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001975 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 new_icvs.proc_bind = proc_bind_icv;
1977 }
1978#endif /* OMP_40_ENABLED */
1979
1980 /* allocate a new parallel team */
1981 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1982 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001983#if OMPT_SUPPORT
1984 ompt_parallel_id,
1985#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001989 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1990 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 /* allocate a new parallel team */
1992 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1993 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001994#if OMPT_SUPPORT
1995 ompt_parallel_id,
1996#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002000 &master_th->th.th_current_task->td_icvs, argc
2001 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002003 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004
2005 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002006 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2007 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2008 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2009 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2010 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002011#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002012 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002013#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002014 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 // TODO: parent_team->t.t_level == INT_MAX ???
2016#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002017 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002019 int new_level = parent_team->t.t_level + 1;
2020 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2021 new_level = parent_team->t.t_active_level + 1;
2022 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#if OMP_40_ENABLED
2024 } else {
2025 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002026 int new_level = parent_team->t.t_level;
2027 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2028 new_level = parent_team->t.t_active_level;
2029 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 }
2031#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2033 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
2034 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002036#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002037 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002038#endif
2039
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 // Update the floating point rounding in the team if required.
2041 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042
2043 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002045#if 0
2046 // Patch out an assertion that trips while the runtime seems to operate correctly.
2047 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002048 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002049#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002051 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002052 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002053
2054 if ( level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002055 // Take a memo of master's task_state
2056 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2057 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002058 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2059 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002060 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002061 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002062 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2063 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2064 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2066 new_stack[i] = 0;
2067 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002068 old_stack = master_th->th.th_task_state_memo_stack;
2069 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002070 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002071 __kmp_free(old_stack);
2072 }
2073 // Store master's task_state on stack
2074 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2075 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002076#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002077 if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team
2078 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2079 }
2080 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002081#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002082 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002083#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002084 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002086 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087#if !KMP_NESTED_HOT_TEAMS
2088 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2089#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002090 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
2092 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2093 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2094 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2095 ( team->t.t_master_tid == 0 &&
2096 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2097 KMP_MB();
2098
2099 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101#if OMP_40_ENABLED
2102 if ( ap ) {
2103#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002104 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002105// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002106#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002107 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002109 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 KMP_CHECK_UPDATE(*argv, new_argv);
2112 argv++;
2113 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#if OMP_40_ENABLED
2115 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002116 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002118 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120 }
2121#endif /* OMP_40_ENABLED */
2122
2123 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002124 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002125 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2126 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002127
2128 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002131#if OMPT_SUPPORT
2132 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
2135 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2136
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002138 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002139# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002140 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 ) {
2143#if USE_ITT_NOTIFY
2144 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2145 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002146 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002147 kmp_uint64 tmp_time = 0;
2148 if ( __itt_get_timestamp_ptr )
2149 tmp_time = __itt_get_timestamp();
2150 // Internal fork - report frame begin
2151 master_th->th.th_frame_time = tmp_time;
2152 if ( __kmp_forkjoin_frames_mode == 3 )
2153 team->t.t_region_time = tmp_time;
2154 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2155#endif /* USE_ITT_NOTIFY */
2156 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2157 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2158 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002159 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2160 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002161 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162#endif /* USE_ITT_BUILD */
2163
2164 /* now go on and do the work */
2165 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2166 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002167 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2168 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169
2170#if USE_ITT_BUILD
2171 if ( __itt_stack_caller_create_ptr ) {
2172 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2173 }
2174#endif /* USE_ITT_BUILD */
2175
2176#if OMP_40_ENABLED
2177 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2178#endif /* OMP_40_ENABLED */
2179 {
2180 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002181 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2182 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183 }
2184
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2187 return TRUE;
2188 }
2189
2190 /* Invoke microtask for MASTER thread */
2191 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2192 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002193 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002194
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002195 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002196 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2197 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00002198 // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 if (! team->t.t_invoke( gtid )) {
2200 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2201 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002202 }
2203 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2204 gtid, team->t.t_id, team->t.t_pkfn ) );
2205 KMP_MB(); /* Flush all pending memory write invalidates. */
2206
2207 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2208
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002209#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002210 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002211 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2212 }
2213#endif
2214
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 return TRUE;
2216}
2217
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002218#if OMPT_SUPPORT
2219static inline void
2220__kmp_join_restore_state(
2221 kmp_info_t *thread,
2222 kmp_team_t *team)
2223{
2224 // restore state outside the region
2225 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2226 ompt_state_work_serial : ompt_state_work_parallel);
2227}
2228
2229static inline void
2230__kmp_join_ompt(
2231 kmp_info_t *thread,
2232 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002233 ompt_parallel_id_t parallel_id,
2234 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235{
2236 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2237 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2238 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002239 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240 }
2241
2242 __kmp_join_restore_state(thread,team);
2243}
2244#endif
2245
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002247__kmp_join_call(ident_t *loc, int gtid
2248#if OMPT_SUPPORT
2249 , enum fork_context_e fork_context
2250#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002251#if OMP_40_ENABLED
2252 , int exit_teams
2253#endif /* OMP_40_ENABLED */
2254)
2255{
Jonathan Peyton45be4502015-08-11 21:36:41 +00002256 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 kmp_team_t *team;
2258 kmp_team_t *parent_team;
2259 kmp_info_t *master_th;
2260 kmp_root_t *root;
2261 int master_active;
2262 int i;
2263
2264 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2265
2266 /* setup current data */
2267 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002268 root = master_th->th.th_root;
2269 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002270 parent_team = team->t.t_parent;
2271
2272 master_th->th.th_ident = loc;
2273
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002274#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002275 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002276 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2277 }
2278#endif
2279
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002280#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2282 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2283 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002284 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2285 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002287#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002288
2289 if( team->t.t_serialized ) {
2290#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002291 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 // We are in teams construct
2293 int level = team->t.t_level;
2294 int tlevel = master_th->th.th_teams_level;
2295 if ( level == tlevel ) {
2296 // AC: we haven't incremented it earlier at start of teams construct,
2297 // so do it here - at the end of teams construct
2298 team->t.t_level++;
2299 } else if ( level == tlevel + 1 ) {
2300 // AC: we are exiting parallel inside teams, need to increment serialization
2301 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2302 team->t.t_serialized++;
2303 }
2304 }
2305#endif /* OMP_40_ENABLED */
2306 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002307
2308#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002309 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002310 __kmp_join_restore_state(master_th, parent_team);
2311 }
2312#endif
2313
Jim Cownie5e8470a2013-09-27 10:38:44 +00002314 return;
2315 }
2316
2317 master_active = team->t.t_master_active;
2318
2319#if OMP_40_ENABLED
2320 if (!exit_teams)
2321#endif /* OMP_40_ENABLED */
2322 {
2323 // AC: No barrier for internal teams at exit from teams construct.
2324 // But there is barrier for external team (league).
2325 __kmp_internal_join( loc, gtid, team );
2326 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002327#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002328 else {
2329 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2330 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002331#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002332
Jim Cownie5e8470a2013-09-27 10:38:44 +00002333 KMP_MB();
2334
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002335#if OMPT_SUPPORT
2336 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2337#endif
2338
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339#if USE_ITT_BUILD
2340 if ( __itt_stack_caller_create_ptr ) {
2341 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2342 }
2343
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002344 // Mark end of "parallel" region for VTune.
2345 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002347 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002349 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002350 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002351 // only one notification scheme (either "submit" or "forking/joined", not both)
2352 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2353 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2354 0, loc, master_th->th.th_team_nproc, 1 );
2355 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2356 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2357 __kmp_itt_region_joined( gtid );
2358 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359#endif /* USE_ITT_BUILD */
2360
2361#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002362 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363 !exit_teams &&
2364 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2365 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2366 // AC: We need to leave the team structure intact at the end
2367 // of parallel inside the teams construct, so that at the next
2368 // parallel same (hot) team works, only adjust nesting levels
2369
2370 /* Decrement our nested depth level */
2371 team->t.t_level --;
2372 team->t.t_active_level --;
2373 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2374
2375 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002378 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002379 kmp_info_t **other_threads = team->t.t_threads;
2380 team->t.t_nproc = new_num;
2381 for ( i = 0; i < old_num; ++i ) {
2382 other_threads[i]->th.th_team_nproc = new_num;
2383 }
2384 // Adjust states of non-used threads of the team
2385 for ( i = old_num; i < new_num; ++i ) {
2386 // Re-initialize thread's barrier data.
2387 int b;
2388 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002389 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002391 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002392#if USE_DEBUGGER
2393 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2394#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002396 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2397 // Synchronize thread's task state
2398 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400 }
2401 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002402
2403#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002404 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002405 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002406 }
2407#endif
2408
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409 return;
2410 }
2411#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002413 /* do cleanup and restore the parent team */
2414 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2415 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2416
2417 master_th->th.th_dispatch =
2418 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002419
2420 /* jc: The following lock has instructions with REL and ACQ semantics,
2421 separating the parallel user code called in this parallel region
2422 from the serial user code called after this function returns.
2423 */
2424 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2425
2426#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002427 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428#endif /* OMP_40_ENABLED */
2429 {
2430 /* Decrement our nested depth level */
2431 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2432 }
2433 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2434
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002435#if OMPT_SUPPORT && OMPT_TRACE
2436 if(ompt_enabled){
2437 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2438 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2439 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2440 parallel_id, task_info->task_id);
2441 }
2442 task_info->frame.exit_runtime_frame = 0;
2443 task_info->task_id = 0;
2444 }
2445#endif
2446
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2448 0, master_th, team ) );
2449 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450
Alp Toker98758b02014-03-02 04:12:06 +00002451#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452 //
2453 // Restore master thread's partition.
2454 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002455 master_th->th.th_first_place = team->t.t_first_place;
2456 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457#endif /* OMP_40_ENABLED */
2458
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002461 if ( root->r.r_active != master_active )
2462 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002463
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002464 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465
2466 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002467 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002469 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470 * assertion failures. it's only one deref&assign so might as well put this
2471 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002472 master_th->th.th_team = parent_team;
2473 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2474 master_th->th.th_team_master = parent_team->t.t_threads[0];
2475 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476
2477 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002478 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002479 parent_team != master_th->th.th_serial_team &&
2480 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002481 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2482 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483 }
2484
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002486 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2487 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2488 // Remember master's state if we re-use this nested hot team
2489 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002490 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002491 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002492 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002494 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002495 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002496 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002500 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2501 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2502 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503
2504 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2505
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002506#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002507 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002508 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002509 }
2510#endif
2511
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512 KMP_MB();
2513 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2514}
2515
2516/* ------------------------------------------------------------------------ */
2517/* ------------------------------------------------------------------------ */
2518
2519/* Check whether we should push an internal control record onto the
2520 serial team stack. If so, do it. */
2521void
2522__kmp_save_internal_controls ( kmp_info_t * thread )
2523{
2524
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002525 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526 return;
2527 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002528 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529 int push = 0;
2530
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002531 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 push = 1;
2533 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2535 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 push = 1;
2537 }
2538 }
2539 if (push) { /* push a record on the serial team's stack */
2540 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2541
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543
2544 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2545
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002546 control->next = thread->th.th_team->t.t_control_stack_top;
2547 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 }
2549 }
2550}
2551
2552/* Changes set_nproc */
2553void
2554__kmp_set_num_threads( int new_nth, int gtid )
2555{
2556 kmp_info_t *thread;
2557 kmp_root_t *root;
2558
2559 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2560 KMP_DEBUG_ASSERT( __kmp_init_serial );
2561
2562 if (new_nth < 1)
2563 new_nth = 1;
2564 else if (new_nth > __kmp_max_nth)
2565 new_nth = __kmp_max_nth;
2566
Jonathan Peyton45be4502015-08-11 21:36:41 +00002567 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002568 thread = __kmp_threads[gtid];
2569
2570 __kmp_save_internal_controls( thread );
2571
2572 set__nproc( thread, new_nth );
2573
2574 //
2575 // If this omp_set_num_threads() call will cause the hot team size to be
2576 // reduced (in the absence of a num_threads clause), then reduce it now,
2577 // rather than waiting for the next parallel region.
2578 //
2579 root = thread->th.th_root;
2580 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002581 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2582#if KMP_NESTED_HOT_TEAMS
2583 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2584#endif
2585 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586 kmp_team_t *hot_team = root->r.r_hot_team;
2587 int f;
2588
2589 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2590
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2593 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002594 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2595 // When decreasing team size, threads no longer in the team should unref task team.
2596 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2597 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598 __kmp_free_thread( hot_team->t.t_threads[f] );
2599 hot_team->t.t_threads[f] = NULL;
2600 }
2601 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002602#if KMP_NESTED_HOT_TEAMS
2603 if( thread->th.th_hot_teams ) {
2604 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2605 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2606 }
2607#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2610
2611 //
2612 // Update the t_nproc field in the threads that are still active.
2613 //
2614 for( f=0 ; f < new_nth; f++ ) {
2615 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2616 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2617 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002619 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621}
2622
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623/* Changes max_active_levels */
2624void
2625__kmp_set_max_active_levels( int gtid, int max_active_levels )
2626{
2627 kmp_info_t *thread;
2628
2629 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2630 KMP_DEBUG_ASSERT( __kmp_init_serial );
2631
2632 // validate max_active_levels
2633 if( max_active_levels < 0 ) {
2634 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2635 // We ignore this call if the user has specified a negative value.
2636 // The current setting won't be changed. The last valid setting will be used.
2637 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2638 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2639 return;
2640 }
2641 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2642 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2643 // We allow a zero value. (implementation defined behavior)
2644 } else {
2645 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2646 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2647 // Current upper limit is MAX_INT. (implementation defined behavior)
2648 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2649 // Actually, the flow should never get here until we use MAX_INT limit.
2650 }
2651 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2652
2653 thread = __kmp_threads[ gtid ];
2654
2655 __kmp_save_internal_controls( thread );
2656
2657 set__max_active_levels( thread, max_active_levels );
2658
2659}
2660
2661/* Gets max_active_levels */
2662int
2663__kmp_get_max_active_levels( int gtid )
2664{
2665 kmp_info_t *thread;
2666
2667 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2668 KMP_DEBUG_ASSERT( __kmp_init_serial );
2669
2670 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002671 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002673 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2674 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675}
2676
2677/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2678void
2679__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2680{
2681 kmp_info_t *thread;
2682// kmp_team_t *team;
2683
2684 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2685 KMP_DEBUG_ASSERT( __kmp_init_serial );
2686
2687 // Check if the kind parameter is valid, correct if needed.
2688 // Valid parameters should fit in one of two intervals - standard or extended:
2689 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2690 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2691 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2692 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2693 {
2694 // TODO: Hint needs attention in case we change the default schedule.
2695 __kmp_msg(
2696 kmp_ms_warning,
2697 KMP_MSG( ScheduleKindOutOfRange, kind ),
2698 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2699 __kmp_msg_null
2700 );
2701 kind = kmp_sched_default;
2702 chunk = 0; // ignore chunk value in case of bad kind
2703 }
2704
2705 thread = __kmp_threads[ gtid ];
2706
2707 __kmp_save_internal_controls( thread );
2708
2709 if ( kind < kmp_sched_upper_std ) {
2710 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2711 // differ static chunked vs. unchunked:
2712 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002713 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002714 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002715 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002716 }
2717 } else {
2718 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2721 }
2722 if ( kind == kmp_sched_auto ) {
2723 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002724 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002726 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727 }
2728}
2729
2730/* Gets def_sched_var ICV values */
2731void
2732__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2733{
2734 kmp_info_t *thread;
2735 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736
2737 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2738 KMP_DEBUG_ASSERT( __kmp_init_serial );
2739
2740 thread = __kmp_threads[ gtid ];
2741
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002742 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002743
2744 switch ( th_type ) {
2745 case kmp_sch_static:
2746 case kmp_sch_static_greedy:
2747 case kmp_sch_static_balanced:
2748 *kind = kmp_sched_static;
2749 *chunk = 0; // chunk was not set, try to show this fact via zero value
2750 return;
2751 case kmp_sch_static_chunked:
2752 *kind = kmp_sched_static;
2753 break;
2754 case kmp_sch_dynamic_chunked:
2755 *kind = kmp_sched_dynamic;
2756 break;
2757 case kmp_sch_guided_chunked:
2758 case kmp_sch_guided_iterative_chunked:
2759 case kmp_sch_guided_analytical_chunked:
2760 *kind = kmp_sched_guided;
2761 break;
2762 case kmp_sch_auto:
2763 *kind = kmp_sched_auto;
2764 break;
2765 case kmp_sch_trapezoidal:
2766 *kind = kmp_sched_trapezoidal;
2767 break;
2768/*
2769 case kmp_sch_static_steal:
2770 *kind = kmp_sched_static_steal;
2771 break;
2772*/
2773 default:
2774 KMP_FATAL( UnknownSchedulingType, th_type );
2775 }
2776
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002777 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778}
2779
2780int
2781__kmp_get_ancestor_thread_num( int gtid, int level ) {
2782
2783 int ii, dd;
2784 kmp_team_t *team;
2785 kmp_info_t *thr;
2786
2787 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2788 KMP_DEBUG_ASSERT( __kmp_init_serial );
2789
2790 // validate level
2791 if( level == 0 ) return 0;
2792 if( level < 0 ) return -1;
2793 thr = __kmp_threads[ gtid ];
2794 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002795 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796 if( level > ii ) return -1;
2797
2798#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002799 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800 // AC: we are in teams region where multiple nested teams have same level
2801 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2802 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2803 KMP_DEBUG_ASSERT( ii >= tlevel );
2804 // AC: As we need to pass by the teams league, we need to artificially increase ii
2805 if ( ii == tlevel ) {
2806 ii += 2; // three teams have same level
2807 } else {
2808 ii ++; // two teams have same level
2809 }
2810 }
2811 }
2812#endif
2813
2814 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2815
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002816 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002817 level++;
2818 while( ii > level )
2819 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002820 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821 {
2822 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002823 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824 team = team->t.t_parent;
2825 continue;
2826 }
2827 if( ii > level ) {
2828 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 ii--;
2831 }
2832 }
2833
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002834 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002835}
2836
2837int
2838__kmp_get_team_size( int gtid, int level ) {
2839
2840 int ii, dd;
2841 kmp_team_t *team;
2842 kmp_info_t *thr;
2843
2844 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2845 KMP_DEBUG_ASSERT( __kmp_init_serial );
2846
2847 // validate level
2848 if( level == 0 ) return 1;
2849 if( level < 0 ) return -1;
2850 thr = __kmp_threads[ gtid ];
2851 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002852 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002853 if( level > ii ) return -1;
2854
2855#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002856 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857 // AC: we are in teams region where multiple nested teams have same level
2858 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2859 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2860 KMP_DEBUG_ASSERT( ii >= tlevel );
2861 // AC: As we need to pass by the teams league, we need to artificially increase ii
2862 if ( ii == tlevel ) {
2863 ii += 2; // three teams have same level
2864 } else {
2865 ii ++; // two teams have same level
2866 }
2867 }
2868 }
2869#endif
2870
2871 while( ii > level )
2872 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002873 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002874 {
2875 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002876 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877 team = team->t.t_parent;
2878 continue;
2879 }
2880 if( ii > level ) {
2881 team = team->t.t_parent;
2882 ii--;
2883 }
2884 }
2885
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002886 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887}
2888
Jim Cownie5e8470a2013-09-27 10:38:44 +00002889kmp_r_sched_t
2890__kmp_get_schedule_global() {
2891// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2892// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2893
2894 kmp_r_sched_t r_sched;
2895
2896 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2897 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2898 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2899 if ( __kmp_sched == kmp_sch_static ) {
2900 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2901 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2902 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2903 } else {
2904 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2905 }
2906
2907 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2908 r_sched.chunk = KMP_DEFAULT_CHUNK;
2909 } else {
2910 r_sched.chunk = __kmp_chunk;
2911 }
2912
2913 return r_sched;
2914}
2915
2916/* ------------------------------------------------------------------------ */
2917/* ------------------------------------------------------------------------ */
2918
2919
2920/*
2921 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2922 * at least argc number of *t_argv entries for the requested team.
2923 */
2924static void
2925__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2926{
2927
2928 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930
2931 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2932 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002933 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002934 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2935 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936
2937 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2938 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002939 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2941 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002942 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002943 if ( __kmp_storage_map ) {
2944 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2945 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2946 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2947 "team_%d.t_inline_argv",
2948 team->t.t_id );
2949 }
2950 } else {
2951 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002952 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002953 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2954 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2955 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002956 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 if ( __kmp_storage_map ) {
2958 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2959 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2960 team->t.t_id );
2961 }
2962 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963 }
2964}
2965
2966static void
2967__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2968{
2969 int i;
2970 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002971 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2972 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002974 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002976 team->t.t_max_nproc = max_nth;
2977
2978 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002979 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002980 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00002981#if OMP_41_ENABLED
2982 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2983#endif
2984 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002985}
2986
2987static void
2988__kmp_free_team_arrays(kmp_team_t *team) {
2989 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2990 int i;
2991 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2992 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2993 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2994 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2995 }; // if
2996 }; // for
2997 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00002998 __kmp_free(team->t.t_disp_buffer);
2999 __kmp_free(team->t.t_dispatch);
3000 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003001 team->t.t_threads = NULL;
3002 team->t.t_disp_buffer = NULL;
3003 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003004 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005}
3006
3007static void
3008__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3009 kmp_info_t **oldThreads = team->t.t_threads;
3010
Jonathan Peytona58563d2016-03-29 20:05:27 +00003011 __kmp_free(team->t.t_disp_buffer);
3012 __kmp_free(team->t.t_dispatch);
3013 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014 __kmp_allocate_team_arrays(team, max_nth);
3015
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003016 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003017
3018 __kmp_free(oldThreads);
3019}
3020
3021static kmp_internal_control_t
3022__kmp_get_global_icvs( void ) {
3023
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003025
3026#if OMP_40_ENABLED
3027 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3028#endif /* OMP_40_ENABLED */
3029
3030 kmp_internal_control_t g_icvs = {
3031 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003032 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3033 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3034 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003035 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
3036 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003037 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3038 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3040 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041#if OMP_40_ENABLED
3042 __kmp_nested_proc_bind.bind_types[0],
3043#endif /* OMP_40_ENABLED */
3044 NULL //struct kmp_internal_control *next;
3045 };
3046
3047 return g_icvs;
3048}
3049
3050static kmp_internal_control_t
3051__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3052
Jim Cownie5e8470a2013-09-27 10:38:44 +00003053 kmp_internal_control_t gx_icvs;
3054 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3055 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3056 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003057
3058 return gx_icvs;
3059}
3060
3061static void
3062__kmp_initialize_root( kmp_root_t *root )
3063{
3064 int f;
3065 kmp_team_t *root_team;
3066 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003067 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003068 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3069 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003070 KMP_DEBUG_ASSERT( root );
3071 KMP_ASSERT( ! root->r.r_begin );
3072
3073 /* setup the root state structure */
3074 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003075 root->r.r_begin = FALSE;
3076 root->r.r_active = FALSE;
3077 root->r.r_in_parallel = 0;
3078 root->r.r_blocktime = __kmp_dflt_blocktime;
3079 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080
3081 /* setup the root team for this task */
3082 /* allocate the root team structure */
3083 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003084
Jim Cownie5e8470a2013-09-27 10:38:44 +00003085 root_team =
3086 __kmp_allocate_team(
3087 root,
3088 1, // new_nproc
3089 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003090#if OMPT_SUPPORT
3091 0, // root parallel id
3092#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093#if OMP_40_ENABLED
3094 __kmp_nested_proc_bind.bind_types[0],
3095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003096 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003098 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003100#if USE_DEBUGGER
3101 // Non-NULL value should be assigned to make the debugger display the root team.
3102 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3103#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104
3105 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3106
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003107 root->r.r_root_team = root_team;
3108 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109
3110 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003111 root_team->t.t_threads[0] = NULL;
3112 root_team->t.t_nproc = 1;
3113 root_team->t.t_serialized = 1;
3114 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3115 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3116 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003117 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3118 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3119
3120 /* setup the hot team for this task */
3121 /* allocate the hot team structure */
3122 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003123
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124 hot_team =
3125 __kmp_allocate_team(
3126 root,
3127 1, // new_nproc
3128 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003129#if OMPT_SUPPORT
3130 0, // root parallel id
3131#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003132#if OMP_40_ENABLED
3133 __kmp_nested_proc_bind.bind_types[0],
3134#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003135 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003136 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003137 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138 );
3139 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3140
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003141 root->r.r_hot_team = hot_team;
3142 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
3144 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003145 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146
3147 /* initialize hot team */
3148 hot_team_max_nth = hot_team->t.t_max_nproc;
3149 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003150 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003151 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003152 hot_team->t.t_nproc = 1;
3153 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3154 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3155 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003156 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003157}
3158
3159#ifdef KMP_DEBUG
3160
3161
3162typedef struct kmp_team_list_item {
3163 kmp_team_p const * entry;
3164 struct kmp_team_list_item * next;
3165} kmp_team_list_item_t;
3166typedef kmp_team_list_item_t * kmp_team_list_t;
3167
3168
3169static void
3170__kmp_print_structure_team_accum( // Add team to list of teams.
3171 kmp_team_list_t list, // List of teams.
3172 kmp_team_p const * team // Team to add.
3173) {
3174
3175 // List must terminate with item where both entry and next are NULL.
3176 // Team is added to the list only once.
3177 // List is sorted in ascending order by team id.
3178 // Team id is *not* a key.
3179
3180 kmp_team_list_t l;
3181
3182 KMP_DEBUG_ASSERT( list != NULL );
3183 if ( team == NULL ) {
3184 return;
3185 }; // if
3186
3187 __kmp_print_structure_team_accum( list, team->t.t_parent );
3188 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3189
3190 // Search list for the team.
3191 l = list;
3192 while ( l->next != NULL && l->entry != team ) {
3193 l = l->next;
3194 }; // while
3195 if ( l->next != NULL ) {
3196 return; // Team has been added before, exit.
3197 }; // if
3198
3199 // Team is not found. Search list again for insertion point.
3200 l = list;
3201 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3202 l = l->next;
3203 }; // while
3204
3205 // Insert team.
3206 {
3207 kmp_team_list_item_t * item =
3208 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3209 * item = * l;
3210 l->entry = team;
3211 l->next = item;
3212 }
3213
3214}
3215
3216static void
3217__kmp_print_structure_team(
3218 char const * title,
3219 kmp_team_p const * team
3220
3221) {
3222 __kmp_printf( "%s", title );
3223 if ( team != NULL ) {
3224 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3225 } else {
3226 __kmp_printf( " - (nil)\n" );
3227 }; // if
3228}
3229
3230static void
3231__kmp_print_structure_thread(
3232 char const * title,
3233 kmp_info_p const * thread
3234
3235) {
3236 __kmp_printf( "%s", title );
3237 if ( thread != NULL ) {
3238 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3239 } else {
3240 __kmp_printf( " - (nil)\n" );
3241 }; // if
3242}
3243
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003244void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003245__kmp_print_structure(
3246 void
3247) {
3248
3249 kmp_team_list_t list;
3250
3251 // Initialize list of teams.
3252 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3253 list->entry = NULL;
3254 list->next = NULL;
3255
3256 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3257 {
3258 int gtid;
3259 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3260 __kmp_printf( "%2d", gtid );
3261 if ( __kmp_threads != NULL ) {
3262 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3263 }; // if
3264 if ( __kmp_root != NULL ) {
3265 __kmp_printf( " %p", __kmp_root[ gtid ] );
3266 }; // if
3267 __kmp_printf( "\n" );
3268 }; // for gtid
3269 }
3270
3271 // Print out __kmp_threads array.
3272 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3273 if ( __kmp_threads != NULL ) {
3274 int gtid;
3275 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3276 kmp_info_t const * thread = __kmp_threads[ gtid ];
3277 if ( thread != NULL ) {
3278 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3279 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3280 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3281 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3282 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3283 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3284 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3285 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3286#if OMP_40_ENABLED
3287 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3288#endif
3289 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3290 __kmp_printf( "\n" );
3291 __kmp_print_structure_team_accum( list, thread->th.th_team );
3292 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3293 }; // if
3294 }; // for gtid
3295 } else {
3296 __kmp_printf( "Threads array is not allocated.\n" );
3297 }; // if
3298
3299 // Print out __kmp_root array.
3300 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3301 if ( __kmp_root != NULL ) {
3302 int gtid;
3303 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3304 kmp_root_t const * root = __kmp_root[ gtid ];
3305 if ( root != NULL ) {
3306 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3307 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3308 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3309 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3310 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3311 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3312 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3313 __kmp_printf( "\n" );
3314 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3315 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3316 }; // if
3317 }; // for gtid
3318 } else {
3319 __kmp_printf( "Ubers array is not allocated.\n" );
3320 }; // if
3321
3322 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3323 while ( list->next != NULL ) {
3324 kmp_team_p const * team = list->entry;
3325 int i;
3326 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3327 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3328 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3329 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3330 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3331 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3332 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3333 __kmp_printf( " Thread %2d: ", i );
3334 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3335 }; // for i
3336 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3337 __kmp_printf( "\n" );
3338 list = list->next;
3339 }; // while
3340
3341 // Print out __kmp_thread_pool and __kmp_team_pool.
3342 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3343 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3344 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3345 __kmp_printf( "\n" );
3346
3347 // Free team list.
3348 while ( list != NULL ) {
3349 kmp_team_list_item_t * item = list;
3350 list = list->next;
3351 KMP_INTERNAL_FREE( item );
3352 }; // while
3353
3354}
3355
3356#endif
3357
3358
3359//---------------------------------------------------------------------------
3360// Stuff for per-thread fast random number generator
3361// Table of primes
3362
3363static const unsigned __kmp_primes[] = {
3364 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3365 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3366 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3367 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3368 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3369 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3370 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3371 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3372 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3373 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3374 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3375 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3376 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3377 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3378 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3379 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3380};
3381
3382//---------------------------------------------------------------------------
3383// __kmp_get_random: Get a random number using a linear congruential method.
3384
3385unsigned short
3386__kmp_get_random( kmp_info_t * thread )
3387{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003388 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003389 unsigned short r = x>>16;
3390
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003391 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003392
3393 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3394 thread->th.th_info.ds.ds_tid, r) );
3395
3396 return r;
3397}
3398//--------------------------------------------------------
3399// __kmp_init_random: Initialize a random number generator
3400
3401void
3402__kmp_init_random( kmp_info_t * thread )
3403{
3404 unsigned seed = thread->th.th_info.ds.ds_tid;
3405
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003406 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3407 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3408 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409}
3410
3411
3412#if KMP_OS_WINDOWS
3413/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3414static int
3415__kmp_reclaim_dead_roots(void) {
3416 int i, r = 0;
3417
3418 for(i = 0; i < __kmp_threads_capacity; ++i) {
3419 if( KMP_UBER_GTID( i ) &&
3420 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3421 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3422 r += __kmp_unregister_root_other_thread(i);
3423 }
3424 }
3425 return r;
3426}
3427#endif
3428
3429/*
3430 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3431 free entries generated.
3432
3433 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3434 already dead.
3435
3436 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3437 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3438 __kmp_tp_capacity, if threadprivate cache array has been created.
3439 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3440
3441 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3442 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3443 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3444 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3445 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3446 as many free slots as possible up to nWish.
3447
3448 If any argument is negative, the behavior is undefined.
3449*/
3450static int
3451__kmp_expand_threads(int nWish, int nNeed) {
3452 int added = 0;
3453 int old_tp_cached;
3454 int __kmp_actual_max_nth;
3455
3456 if(nNeed > nWish) /* normalize the arguments */
3457 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003458#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459/* only for Windows static library */
3460 /* reclaim array entries for root threads that are already dead */
3461 added = __kmp_reclaim_dead_roots();
3462
3463 if(nNeed) {
3464 nNeed -= added;
3465 if(nNeed < 0)
3466 nNeed = 0;
3467 }
3468 if(nWish) {
3469 nWish -= added;
3470 if(nWish < 0)
3471 nWish = 0;
3472 }
3473#endif
3474 if(nWish <= 0)
3475 return added;
3476
3477 while(1) {
3478 int nTarget;
3479 int minimumRequiredCapacity;
3480 int newCapacity;
3481 kmp_info_t **newThreads;
3482 kmp_root_t **newRoot;
3483
3484 //
3485 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3486 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3487 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3488 // become > __kmp_max_nth in one of two ways:
3489 //
3490 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3491 // may not be resused by another thread, so we may need to increase
3492 // __kmp_threads_capacity to __kmp_max_threads + 1.
3493 //
3494 // 2) New foreign root(s) are encountered. We always register new
3495 // foreign roots. This may cause a smaller # of threads to be
3496 // allocated at subsequent parallel regions, but the worker threads
3497 // hang around (and eventually go to sleep) and need slots in the
3498 // __kmp_threads[] array.
3499 //
3500 // Anyway, that is the reason for moving the check to see if
3501 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3502 // instead of having it performed here. -BB
3503 //
3504 old_tp_cached = __kmp_tp_cached;
3505 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3506 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3507
3508 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3509 nTarget = nWish;
3510 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3511 /* can't fulfil nWish, so try nNeed */
3512 if(nNeed) {
3513 nTarget = nNeed;
3514 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3515 /* possible expansion too small -- give up */
3516 break;
3517 }
3518 } else {
3519 /* best-effort */
3520 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3521 if(!nTarget) {
3522 /* can expand at all -- give up */
3523 break;
3524 }
3525 }
3526 }
3527 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3528
3529 newCapacity = __kmp_threads_capacity;
3530 do{
3531 newCapacity =
3532 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3533 (newCapacity << 1) :
3534 __kmp_actual_max_nth;
3535 } while(newCapacity < minimumRequiredCapacity);
3536 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3537 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003538 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3539 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540 memset(newThreads + __kmp_threads_capacity, 0,
3541 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3542 memset(newRoot + __kmp_threads_capacity, 0,
3543 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3544
3545 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3546 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3547 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3548 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3549 of a double-check pair.
3550 */
3551 __kmp_free(newThreads);
3552 continue; /* start over and try again */
3553 }
3554 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3555 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3556 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3557 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3558 __kmp_free(newThreads);
3559 continue; /* start over and try again */
3560 } else {
3561 /* success */
3562 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3563 //
3564 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3565 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3566 added += newCapacity - __kmp_threads_capacity;
3567 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3568 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003569 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003570 }
3571 }
3572 return added;
3573}
3574
3575/* register the current thread as a root thread and obtain our gtid */
3576/* we must have the __kmp_initz_lock held at this point */
3577/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3578int
3579__kmp_register_root( int initial_thread )
3580{
3581 kmp_info_t *root_thread;
3582 kmp_root_t *root;
3583 int gtid;
3584 int capacity;
3585 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3586 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3587 KMP_MB();
3588
3589
3590 /*
3591 2007-03-02:
3592
3593 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3594 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3595 return false (that means there is at least one empty slot in __kmp_threads array), but it
3596 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3597 used for this one. Following code workarounds this bug.
3598
3599 However, right solution seems to be not reserving slot #0 for initial thread because:
3600 (1) there is no magic in slot #0,
3601 (2) we cannot detect initial thread reliably (the first thread which does serial
3602 initialization may be not a real initial thread).
3603 */
3604 capacity = __kmp_threads_capacity;
3605 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3606 -- capacity;
3607 }; // if
3608
3609 /* see if there are too many threads */
3610 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3611 if ( __kmp_tp_cached ) {
3612 __kmp_msg(
3613 kmp_ms_fatal,
3614 KMP_MSG( CantRegisterNewThread ),
3615 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3616 KMP_HNT( PossibleSystemLimitOnThreads ),
3617 __kmp_msg_null
3618 );
3619 }
3620 else {
3621 __kmp_msg(
3622 kmp_ms_fatal,
3623 KMP_MSG( CantRegisterNewThread ),
3624 KMP_HNT( SystemLimitOnThreads ),
3625 __kmp_msg_null
3626 );
3627 }
3628 }; // if
3629
3630 /* find an available thread slot */
3631 /* Don't reassign the zero slot since we need that to only be used by initial
3632 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003633 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3634 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003635 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3636 KMP_ASSERT( gtid < __kmp_threads_capacity );
3637
3638 /* update global accounting */
3639 __kmp_all_nth ++;
3640 TCW_4(__kmp_nth, __kmp_nth + 1);
3641
3642 //
3643 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3644 // for low numbers of procs, and method #2 (keyed API call) for higher
3645 // numbers of procs.
3646 //
3647 if ( __kmp_adjust_gtid_mode ) {
3648 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3649 if ( TCR_4(__kmp_gtid_mode) != 2) {
3650 TCW_4(__kmp_gtid_mode, 2);
3651 }
3652 }
3653 else {
3654 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3655 TCW_4(__kmp_gtid_mode, 1);
3656 }
3657 }
3658 }
3659
3660#ifdef KMP_ADJUST_BLOCKTIME
3661 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003662 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003663 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3664 if ( __kmp_nth > __kmp_avail_proc ) {
3665 __kmp_zero_bt = TRUE;
3666 }
3667 }
3668#endif /* KMP_ADJUST_BLOCKTIME */
3669
3670 /* setup this new hierarchy */
3671 if( ! ( root = __kmp_root[gtid] )) {
3672 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3673 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3674 }
3675
3676 __kmp_initialize_root( root );
3677
3678 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003679 if( root->r.r_uber_thread ) {
3680 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681 } else {
3682 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3683 if ( __kmp_storage_map ) {
3684 __kmp_print_thread_storage_map( root_thread, gtid );
3685 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003686 root_thread->th.th_info .ds.ds_gtid = gtid;
3687 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003688 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003689 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690 }
3691 #if USE_FAST_MEMORY
3692 __kmp_initialize_fast_memory( root_thread );
3693 #endif /* USE_FAST_MEMORY */
3694
3695 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003696 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697 __kmp_initialize_bget( root_thread );
3698 #endif
3699 __kmp_init_random( root_thread ); // Initialize random number generator
3700 }
3701
3702 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003703 if( ! root_thread->th.th_serial_team ) {
3704 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003705 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003706
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003707 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003708#if OMPT_SUPPORT
3709 0, // root parallel id
3710#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711#if OMP_40_ENABLED
3712 proc_bind_default,
3713#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003715 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003717 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
3721 /* drop root_thread into place */
3722 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3723
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003724 root->r.r_root_team->t.t_threads[0] = root_thread;
3725 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3726 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3727 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3728 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003729
3730 /* initialize the thread, get it ready to go */
3731 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003732 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733
3734 /* prepare the master thread for get_gtid() */
3735 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003736
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003737#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003738 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003739#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003740
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741 #ifdef KMP_TDATA_GTID
3742 __kmp_gtid = gtid;
3743 #endif
3744 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3745 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003746
3747 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3748 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003749 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750 KMP_INIT_BARRIER_STATE ) );
3751 { // Initialize barrier data.
3752 int b;
3753 for ( b = 0; b < bs_last_barrier; ++ b ) {
3754 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003755#if USE_DEBUGGER
3756 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3757#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003758 }; // for
3759 }
3760 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3761
Alp Toker763b9392014-02-28 09:42:41 +00003762#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003763# if OMP_40_ENABLED
3764 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3765 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3766 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3767 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3768# endif
3769
Jim Cownie5e8470a2013-09-27 10:38:44 +00003770 if ( TCR_4(__kmp_init_middle) ) {
3771 __kmp_affinity_set_init_mask( gtid, TRUE );
3772 }
Alp Toker763b9392014-02-28 09:42:41 +00003773#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774
3775 __kmp_root_counter ++;
3776
3777 KMP_MB();
3778 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3779
3780 return gtid;
3781}
3782
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003783#if KMP_NESTED_HOT_TEAMS
3784static int
3785__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3786{
3787 int i, n, nth;
3788 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3789 if( !hot_teams || !hot_teams[level].hot_team ) {
3790 return 0;
3791 }
3792 KMP_DEBUG_ASSERT( level < max_level );
3793 kmp_team_t *team = hot_teams[level].hot_team;
3794 nth = hot_teams[level].hot_team_nth;
3795 n = nth - 1; // master is not freed
3796 if( level < max_level - 1 ) {
3797 for( i = 0; i < nth; ++i ) {
3798 kmp_info_t *th = team->t.t_threads[i];
3799 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3800 if( i > 0 && th->th.th_hot_teams ) {
3801 __kmp_free( th->th.th_hot_teams );
3802 th->th.th_hot_teams = NULL;
3803 }
3804 }
3805 }
3806 __kmp_free_team( root, team, NULL );
3807 return n;
3808}
3809#endif
3810
Jim Cownie5e8470a2013-09-27 10:38:44 +00003811/* Resets a root thread and clear its root and hot teams.
3812 Returns the number of __kmp_threads entries directly and indirectly freed.
3813*/
3814static int
3815__kmp_reset_root(int gtid, kmp_root_t *root)
3816{
3817 kmp_team_t * root_team = root->r.r_root_team;
3818 kmp_team_t * hot_team = root->r.r_hot_team;
3819 int n = hot_team->t.t_nproc;
3820 int i;
3821
3822 KMP_DEBUG_ASSERT( ! root->r.r_active );
3823
3824 root->r.r_root_team = NULL;
3825 root->r.r_hot_team = NULL;
3826 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3827 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003828 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3829#if KMP_NESTED_HOT_TEAMS
3830 if( __kmp_hot_teams_max_level > 1 ) { // need to free nested hot teams and their threads if any
3831 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3832 kmp_info_t *th = hot_team->t.t_threads[i];
3833 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3834 if( th->th.th_hot_teams ) {
3835 __kmp_free( th->th.th_hot_teams );
3836 th->th.th_hot_teams = NULL;
3837 }
3838 }
3839 }
3840#endif
3841 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843 //
3844 // Before we can reap the thread, we need to make certain that all
3845 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3846 //
3847 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3848 __kmp_wait_to_unref_task_teams();
3849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003850
3851 #if KMP_OS_WINDOWS
3852 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3853 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3854 (LPVOID)&(root->r.r_uber_thread->th),
3855 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3856 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3857 #endif /* KMP_OS_WINDOWS */
3858
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003859#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003860 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003861 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3862 int gtid = __kmp_get_gtid();
3863 __ompt_thread_end(ompt_thread_initial, gtid);
3864 }
3865#endif
3866
Jim Cownie5e8470a2013-09-27 10:38:44 +00003867 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3868 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3869
3870 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3871 root->r.r_uber_thread = NULL;
3872 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003873 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003874
3875 return n;
3876}
3877
3878void
3879__kmp_unregister_root_current_thread( int gtid )
3880{
Jim Cownie77c2a632014-09-03 11:34:33 +00003881 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003882 /* this lock should be ok, since unregister_root_current_thread is never called during
3883 * and abort, only during a normal close. furthermore, if you have the
3884 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003885
3886 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3887 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3888 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3889 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3890 return;
3891 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003892 kmp_root_t *root = __kmp_root[gtid];
3893
Jim Cownie5e8470a2013-09-27 10:38:44 +00003894 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3895 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3896 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3897 KMP_ASSERT( root->r.r_active == FALSE );
3898
Jim Cownie5e8470a2013-09-27 10:38:44 +00003899
3900 KMP_MB();
3901
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003902#if OMP_41_ENABLED
3903 kmp_info_t * thread = __kmp_threads[gtid];
3904 kmp_team_t * team = thread->th.th_team;
3905 kmp_task_team_t * task_team = thread->th.th_task_team;
3906
3907 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003908 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3909#if OMPT_SUPPORT
3910 // the runtime is shutting down so we won't report any events
3911 thread->th.ompt_thread_info.state = ompt_state_undefined;
3912#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003913 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003914 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003915#endif
3916
Jim Cownie5e8470a2013-09-27 10:38:44 +00003917 __kmp_reset_root(gtid, root);
3918
3919 /* free up this thread slot */
3920 __kmp_gtid_set_specific( KMP_GTID_DNE );
3921#ifdef KMP_TDATA_GTID
3922 __kmp_gtid = KMP_GTID_DNE;
3923#endif
3924
3925 KMP_MB();
3926 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3927
3928 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3929}
3930
Jonathan Peyton2321d572015-06-08 19:25:25 +00003931#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003932/* __kmp_forkjoin_lock must be already held
3933 Unregisters a root thread that is not the current thread. Returns the number of
3934 __kmp_threads entries freed as a result.
3935 */
3936static int
3937__kmp_unregister_root_other_thread( int gtid )
3938{
3939 kmp_root_t *root = __kmp_root[gtid];
3940 int r;
3941
3942 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3943 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3944 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3945 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3946 KMP_ASSERT( root->r.r_active == FALSE );
3947
3948 r = __kmp_reset_root(gtid, root);
3949 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3950 return r;
3951}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003952#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Jim Cownie5e8470a2013-09-27 10:38:44 +00003954#if KMP_DEBUG
3955void __kmp_task_info() {
3956
3957 kmp_int32 gtid = __kmp_entry_gtid();
3958 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3959 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003960 kmp_team_t *steam = this_thr->th.th_serial_team;
3961 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003962
3963 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3964 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3965}
3966#endif // KMP_DEBUG
3967
Jim Cownie5e8470a2013-09-27 10:38:44 +00003968/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003969 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970 * initialization of features as much as possible */
3971static void
3972__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3973{
3974 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3975 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003976 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003977 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003978 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003979 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003980 KMP_DEBUG_ASSERT( team->t.t_threads );
3981 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3982 KMP_DEBUG_ASSERT( master );
3983 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003984
3985 KMP_MB();
3986
3987 TCW_SYNC_PTR(this_thr->th.th_team, team);
3988
3989 this_thr->th.th_info.ds.ds_tid = tid;
3990 this_thr->th.th_set_nproc = 0;
3991#if OMP_40_ENABLED
3992 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00003993# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003994 this_thr->th.th_new_place = this_thr->th.th_current_place;
3995# endif
3996#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003997 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003998
3999 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004000 this_thr->th.th_team_nproc = team->t.t_nproc;
4001 this_thr->th.th_team_master = master;
4002 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4004
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004005 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004006
4007 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4008 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4009
4010 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4011
4012 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4013 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4014 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015
4016 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004017 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018
4019 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004020
4021#ifdef BUILD_TV
4022 this_thr->th.th_local.tv_data = 0;
4023#endif
4024
4025 if ( ! this_thr->th.th_pri_common ) {
4026 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4027 if ( __kmp_storage_map ) {
4028 __kmp_print_storage_map_gtid(
4029 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4030 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4031 );
4032 }; // if
4033 this_thr->th.th_pri_head = NULL;
4034 }; // if
4035
4036 /* Initialize dynamic dispatch */
4037 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004038 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039 /*
4040 * Use team max_nproc since this will never change for the team.
4041 */
4042 size_t disp_size = sizeof( dispatch_private_info_t ) *
4043 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4044 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4045 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004046 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004047 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4048
4049 dispatch->th_disp_index = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004050#if OMP_41_ENABLED
4051 dispatch->th_doacross_buf_idx = 0;
4052#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004053 if( ! dispatch->th_disp_buffer ) {
4054 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055
4056 if ( __kmp_storage_map ) {
4057 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4058 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4059 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4060 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4061 gtid, team->t.t_id, gtid );
4062 }
4063 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004064 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065 }
4066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004067 dispatch->th_dispatch_pr_current = 0;
4068 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004069
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004070 dispatch->th_deo_fcn = 0; /* ORDERED */
4071 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004072 }
4073
4074 this_thr->th.th_next_pool = NULL;
4075
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004076 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004077 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004078 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4079 this_thr->th.th_task_state_top = 0;
4080 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004081 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4082 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004083 }
4084
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4086 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4087
4088 KMP_MB();
4089}
4090
4091
4092/* allocate a new thread for the requesting team. this is only called from within a
4093 * forkjoin critical section. we will first try to get an available thread from the
4094 * thread pool. if none is available, we will fork a new one assuming we are able
4095 * to create a new one. this should be assured, as the caller should check on this
4096 * first.
4097 */
4098kmp_info_t *
4099__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4100{
4101 kmp_team_t *serial_team;
4102 kmp_info_t *new_thr;
4103 int new_gtid;
4104
4105 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4106 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004107#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004108 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004109#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004110 KMP_MB();
4111
4112 /* first, try to get one from the thread pool */
4113 if ( __kmp_thread_pool ) {
4114
4115 new_thr = (kmp_info_t*)__kmp_thread_pool;
4116 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4117 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4118 __kmp_thread_pool_insert_pt = NULL;
4119 }
4120 TCW_4(new_thr->th.th_in_pool, FALSE);
4121 //
4122 // Don't touch th_active_in_pool or th_active.
4123 // The worker thread adjusts those flags as it sleeps/awakens.
4124 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004125 __kmp_thread_pool_nth--;
4126
4127 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4128 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004129 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4131 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4132
4133 /* setup the thread structure */
4134 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4135 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4136
4137 TCW_4(__kmp_nth, __kmp_nth + 1);
4138
Jonathan Peyton54127982015-11-04 21:37:48 +00004139 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004140 new_thr->th.th_task_state_top = 0;
4141 new_thr->th.th_task_state_stack_sz = 4;
4142
Jim Cownie5e8470a2013-09-27 10:38:44 +00004143#ifdef KMP_ADJUST_BLOCKTIME
4144 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004145 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004146 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4147 if ( __kmp_nth > __kmp_avail_proc ) {
4148 __kmp_zero_bt = TRUE;
4149 }
4150 }
4151#endif /* KMP_ADJUST_BLOCKTIME */
4152
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004153#if KMP_DEBUG
4154 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4155 int b;
4156 kmp_balign_t * balign = new_thr->th.th_bar;
4157 for( b = 0; b < bs_last_barrier; ++ b )
4158 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4159#endif
4160
Jim Cownie5e8470a2013-09-27 10:38:44 +00004161 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4162 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4163
4164 KMP_MB();
4165 return new_thr;
4166 }
4167
4168
4169 /* no, well fork a new one */
4170 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4171 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4172
4173 //
4174 // If this is the first worker thread the RTL is creating, then also
4175 // launch the monitor thread. We try to do this as early as possible.
4176 //
4177 if ( ! TCR_4( __kmp_init_monitor ) ) {
4178 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4179 if ( ! TCR_4( __kmp_init_monitor ) ) {
4180 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4181 TCW_4( __kmp_init_monitor, 1 );
4182 __kmp_create_monitor( & __kmp_monitor );
4183 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004184 #if KMP_OS_WINDOWS
4185 // AC: wait until monitor has started. This is a fix for CQ232808.
4186 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4187 // work in between, then there is high probability that monitor thread started after
4188 // the library shutdown. At shutdown it is too late to cope with the problem, because
4189 // when the master is in DllMain (process detach) the monitor has no chances to start
4190 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4191 // because all the memory which the monitor can access is going to be released/reset.
4192 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4193 KMP_YIELD( TRUE );
4194 }
4195 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4196 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004197 }
4198 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4199 }
4200
4201 KMP_MB();
4202 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4203 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4204 }
4205
4206 /* allocate space for it. */
4207 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4208
4209 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4210
4211 if ( __kmp_storage_map ) {
4212 __kmp_print_thread_storage_map( new_thr, new_gtid );
4213 }
4214
4215 /* add the reserve serialized team, initialized from the team's master thread */
4216 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004217 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004218 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004219
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004220 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004221 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004222#if OMPT_SUPPORT
4223 0, // root parallel id
4224#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225#if OMP_40_ENABLED
4226 proc_bind_default,
4227#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004228 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004230 }
4231 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004232 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4233 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004234 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4235 new_thr ) );
4236
4237 /* setup the thread structures */
4238 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4239
4240 #if USE_FAST_MEMORY
4241 __kmp_initialize_fast_memory( new_thr );
4242 #endif /* USE_FAST_MEMORY */
4243
4244 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004245 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246 __kmp_initialize_bget( new_thr );
4247 #endif
4248
4249 __kmp_init_random( new_thr ); // Initialize random number generator
4250
4251 /* Initialize these only once when thread is grabbed for a team allocation */
4252 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4253 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4254
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004255 int b;
4256 kmp_balign_t * balign = new_thr->th.th_bar;
4257 for(b=0; b<bs_last_barrier; ++b) {
4258 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4259 balign[b].bb.team = NULL;
4260 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4261 balign[b].bb.use_oncore_barrier = 0;
4262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004263
4264 new_thr->th.th_spin_here = FALSE;
4265 new_thr->th.th_next_waiting = 0;
4266
Alp Toker98758b02014-03-02 04:12:06 +00004267#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004268 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4269 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4270 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4271 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4272#endif
4273
4274 TCW_4(new_thr->th.th_in_pool, FALSE);
4275 new_thr->th.th_active_in_pool = FALSE;
4276 TCW_4(new_thr->th.th_active, TRUE);
4277
4278 /* adjust the global counters */
4279 __kmp_all_nth ++;
4280 __kmp_nth ++;
4281
4282 //
4283 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4284 // for low numbers of procs, and method #2 (keyed API call) for higher
4285 // numbers of procs.
4286 //
4287 if ( __kmp_adjust_gtid_mode ) {
4288 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4289 if ( TCR_4(__kmp_gtid_mode) != 2) {
4290 TCW_4(__kmp_gtid_mode, 2);
4291 }
4292 }
4293 else {
4294 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4295 TCW_4(__kmp_gtid_mode, 1);
4296 }
4297 }
4298 }
4299
4300#ifdef KMP_ADJUST_BLOCKTIME
4301 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004302 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004303 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4304 if ( __kmp_nth > __kmp_avail_proc ) {
4305 __kmp_zero_bt = TRUE;
4306 }
4307 }
4308#endif /* KMP_ADJUST_BLOCKTIME */
4309
4310 /* actually fork it and create the new worker thread */
4311 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4312 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4313 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4314
Jim Cownie5e8470a2013-09-27 10:38:44 +00004315 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4316 KMP_MB();
4317 return new_thr;
4318}
4319
4320/*
4321 * reinitialize team for reuse.
4322 *
4323 * The hot team code calls this case at every fork barrier, so EPCC barrier
4324 * test are extremely sensitive to changes in it, esp. writes to the team
4325 * struct, which cause a cache invalidation in all threads.
4326 *
4327 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4328 */
4329static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004330__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004331 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4332 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004333 KMP_DEBUG_ASSERT( team && new_icvs);
4334 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004335 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004336
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004337 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004338
Jim Cownie181b4bb2013-12-23 17:28:57 +00004339 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004340 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004341 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004342
4343 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4344 team->t.t_threads[0], team ) );
4345}
4346
Jim Cownie5e8470a2013-09-27 10:38:44 +00004347
4348/* initialize the team data structure
4349 * this assumes the t_threads and t_max_nproc are already set
4350 * also, we don't touch the arguments */
4351static void
4352__kmp_initialize_team(
4353 kmp_team_t * team,
4354 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004355 kmp_internal_control_t * new_icvs,
4356 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004357) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004358 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4359
Jim Cownie5e8470a2013-09-27 10:38:44 +00004360 /* verify */
4361 KMP_DEBUG_ASSERT( team );
4362 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4363 KMP_DEBUG_ASSERT( team->t.t_threads );
4364 KMP_MB();
4365
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004366 team->t.t_master_tid = 0; /* not needed */
4367 /* team->t.t_master_bar; not needed */
4368 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4369 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004370
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004371 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4372 team->t.t_next_pool = NULL;
4373 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004374
4375 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004376 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004377
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004378 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4379 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004380
4381#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004382 team->t.t_fp_control_saved = FALSE; /* not needed */
4383 team->t.t_x87_fpu_control_word = 0; /* not needed */
4384 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004385#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004387 team->t.t_construct = 0;
4388 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004389
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 team->t.t_ordered .dt.t_value = 0;
4391 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004392
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004393 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394
4395#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004396 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004397#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004398 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004399
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004400 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004401
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004402 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
4404 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004405 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406}
4407
Alp Toker98758b02014-03-02 04:12:06 +00004408#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409/* Sets full mask for thread and returns old mask, no changes to structures. */
4410static void
4411__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4412{
4413 if ( KMP_AFFINITY_CAPABLE() ) {
4414 int status;
4415 if ( old_mask != NULL ) {
4416 status = __kmp_get_system_affinity( old_mask, TRUE );
4417 int error = errno;
4418 if ( status != 0 ) {
4419 __kmp_msg(
4420 kmp_ms_fatal,
4421 KMP_MSG( ChangeThreadAffMaskError ),
4422 KMP_ERR( error ),
4423 __kmp_msg_null
4424 );
4425 }
4426 }
4427 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4428 }
4429}
4430#endif
4431
Alp Toker98758b02014-03-02 04:12:06 +00004432#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004433
4434//
4435// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4436// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004437// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004438// The master thread's partition should already include its current binding.
4439//
4440static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004441__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004442{
4443 //
4444 // Copy the master thread's place partion to the team struct
4445 //
4446 kmp_info_t *master_th = team->t.t_threads[0];
4447 KMP_DEBUG_ASSERT( master_th != NULL );
4448 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4449 int first_place = master_th->th.th_first_place;
4450 int last_place = master_th->th.th_last_place;
4451 int masters_place = master_th->th.th_current_place;
4452 team->t.t_first_place = first_place;
4453 team->t.t_last_place = last_place;
4454
4455 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4456 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4457 masters_place, first_place, last_place ) );
4458
4459 switch ( proc_bind ) {
4460
4461 case proc_bind_default:
4462 //
4463 // serial teams might have the proc_bind policy set to
4464 // proc_bind_default. It doesn't matter, as we don't
4465 // rebind the master thread for any proc_bind policy.
4466 //
4467 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4468 break;
4469
4470 case proc_bind_master:
4471 {
4472 int f;
4473 int n_th = team->t.t_nproc;
4474 for ( f = 1; f < n_th; f++ ) {
4475 kmp_info_t *th = team->t.t_threads[f];
4476 KMP_DEBUG_ASSERT( th != NULL );
4477 th->th.th_first_place = first_place;
4478 th->th.th_last_place = last_place;
4479 th->th.th_new_place = masters_place;
4480
4481 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4482 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4483 team->t.t_id, f, masters_place, first_place, last_place ) );
4484 }
4485 }
4486 break;
4487
4488 case proc_bind_close:
4489 {
4490 int f;
4491 int n_th = team->t.t_nproc;
4492 int n_places;
4493 if ( first_place <= last_place ) {
4494 n_places = last_place - first_place + 1;
4495 }
4496 else {
4497 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4498 }
4499 if ( n_th <= n_places ) {
4500 int place = masters_place;
4501 for ( f = 1; f < n_th; f++ ) {
4502 kmp_info_t *th = team->t.t_threads[f];
4503 KMP_DEBUG_ASSERT( th != NULL );
4504
4505 if ( place == last_place ) {
4506 place = first_place;
4507 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004508 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004509 place = 0;
4510 }
4511 else {
4512 place++;
4513 }
4514 th->th.th_first_place = first_place;
4515 th->th.th_last_place = last_place;
4516 th->th.th_new_place = place;
4517
4518 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4519 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4520 team->t.t_id, f, place, first_place, last_place ) );
4521 }
4522 }
4523 else {
4524 int S, rem, gap, s_count;
4525 S = n_th / n_places;
4526 s_count = 0;
4527 rem = n_th - ( S * n_places );
4528 gap = rem > 0 ? n_places/rem : n_places;
4529 int place = masters_place;
4530 int gap_ct = gap;
4531 for ( f = 0; f < n_th; f++ ) {
4532 kmp_info_t *th = team->t.t_threads[f];
4533 KMP_DEBUG_ASSERT( th != NULL );
4534
4535 th->th.th_first_place = first_place;
4536 th->th.th_last_place = last_place;
4537 th->th.th_new_place = place;
4538 s_count++;
4539
4540 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4541 // do nothing, add an extra thread to place on next iteration
4542 }
4543 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4544 // we added an extra thread to this place; move to next place
4545 if ( place == last_place ) {
4546 place = first_place;
4547 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004548 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004549 place = 0;
4550 }
4551 else {
4552 place++;
4553 }
4554 s_count = 0;
4555 gap_ct = 1;
4556 rem--;
4557 }
4558 else if (s_count == S) { // place full; don't add extra
4559 if ( place == last_place ) {
4560 place = first_place;
4561 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004562 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004563 place = 0;
4564 }
4565 else {
4566 place++;
4567 }
4568 gap_ct++;
4569 s_count = 0;
4570 }
4571
4572 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4573 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4574 team->t.t_id, f, th->th.th_new_place, first_place,
4575 last_place ) );
4576 }
4577 KMP_DEBUG_ASSERT( place == masters_place );
4578 }
4579 }
4580 break;
4581
4582 case proc_bind_spread:
4583 {
4584 int f;
4585 int n_th = team->t.t_nproc;
4586 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004587 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004588 if ( first_place <= last_place ) {
4589 n_places = last_place - first_place + 1;
4590 }
4591 else {
4592 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4593 }
4594 if ( n_th <= n_places ) {
4595 int place = masters_place;
4596 int S = n_places/n_th;
4597 int s_count, rem, gap, gap_ct;
4598 rem = n_places - n_th*S;
4599 gap = rem ? n_th/rem : 1;
4600 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004601 thidx = n_th;
4602 if (update_master_only == 1)
4603 thidx = 1;
4604 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004605 kmp_info_t *th = team->t.t_threads[f];
4606 KMP_DEBUG_ASSERT( th != NULL );
4607
4608 th->th.th_first_place = place;
4609 th->th.th_new_place = place;
4610 s_count = 1;
4611 while (s_count < S) {
4612 if ( place == last_place ) {
4613 place = first_place;
4614 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004615 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004616 place = 0;
4617 }
4618 else {
4619 place++;
4620 }
4621 s_count++;
4622 }
4623 if (rem && (gap_ct == gap)) {
4624 if ( place == last_place ) {
4625 place = first_place;
4626 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004627 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004628 place = 0;
4629 }
4630 else {
4631 place++;
4632 }
4633 rem--;
4634 gap_ct = 0;
4635 }
4636 th->th.th_last_place = place;
4637 gap_ct++;
4638
4639 if ( place == last_place ) {
4640 place = first_place;
4641 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004642 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004643 place = 0;
4644 }
4645 else {
4646 place++;
4647 }
4648
4649 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4650 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4651 team->t.t_id, f, th->th.th_new_place,
4652 th->th.th_first_place, th->th.th_last_place ) );
4653 }
4654 KMP_DEBUG_ASSERT( place == masters_place );
4655 }
4656 else {
4657 int S, rem, gap, s_count;
4658 S = n_th / n_places;
4659 s_count = 0;
4660 rem = n_th - ( S * n_places );
4661 gap = rem > 0 ? n_places/rem : n_places;
4662 int place = masters_place;
4663 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004664 thidx = n_th;
4665 if (update_master_only == 1)
4666 thidx = 1;
4667 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004668 kmp_info_t *th = team->t.t_threads[f];
4669 KMP_DEBUG_ASSERT( th != NULL );
4670
4671 th->th.th_first_place = place;
4672 th->th.th_last_place = place;
4673 th->th.th_new_place = place;
4674 s_count++;
4675
4676 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4677 // do nothing, add an extra thread to place on next iteration
4678 }
4679 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4680 // we added an extra thread to this place; move on to next place
4681 if ( place == last_place ) {
4682 place = first_place;
4683 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004684 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004685 place = 0;
4686 }
4687 else {
4688 place++;
4689 }
4690 s_count = 0;
4691 gap_ct = 1;
4692 rem--;
4693 }
4694 else if (s_count == S) { // place is full; don't add extra thread
4695 if ( place == last_place ) {
4696 place = first_place;
4697 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004698 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004699 place = 0;
4700 }
4701 else {
4702 place++;
4703 }
4704 gap_ct++;
4705 s_count = 0;
4706 }
4707
4708 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4709 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4710 team->t.t_id, f, th->th.th_new_place,
4711 th->th.th_first_place, th->th.th_last_place) );
4712 }
4713 KMP_DEBUG_ASSERT( place == masters_place );
4714 }
4715 }
4716 break;
4717
4718 default:
4719 break;
4720 }
4721
4722 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4723}
4724
Alp Toker98758b02014-03-02 04:12:06 +00004725#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004726
4727/* allocate a new team data structure to use. take one off of the free pool if available */
4728kmp_team_t *
4729__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004730#if OMPT_SUPPORT
4731 ompt_parallel_id_t ompt_parallel_id,
4732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004733#if OMP_40_ENABLED
4734 kmp_proc_bind_t new_proc_bind,
4735#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004736 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004737 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004738{
Jonathan Peyton45be4502015-08-11 21:36:41 +00004739 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740 int f;
4741 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004742 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004743 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004744
4745 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4746 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4747 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4748 KMP_MB();
4749
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004750#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004751 kmp_hot_team_ptr_t *hot_teams;
4752 if( master ) {
4753 team = master->th.th_team;
4754 level = team->t.t_active_level;
4755 if( master->th.th_teams_microtask ) { // in teams construct?
4756 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4757 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4758 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4759 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4760 }
4761 }
4762 hot_teams = master->th.th_hot_teams;
4763 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4764 { // hot team has already been allocated for given level
4765 use_hot_team = 1;
4766 } else {
4767 use_hot_team = 0;
4768 }
4769 }
4770#endif
4771 // Optimization to use a "hot" team
4772 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004773 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004774#if KMP_NESTED_HOT_TEAMS
4775 team = hot_teams[level].hot_team;
4776#else
4777 team = root->r.r_hot_team;
4778#endif
4779#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004780 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004781 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4782 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004783 }
4784#endif
4785
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004786 // Has the number of threads changed?
4787 /* Let's assume the most common case is that the number of threads is unchanged, and
4788 put that case first. */
4789 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4790 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004791 // This case can mean that omp_set_num_threads() was called and the hot team size
4792 // was already reduced, so we check the special flag
4793 if ( team->t.t_size_changed == -1 ) {
4794 team->t.t_size_changed = 1;
4795 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004796 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004797 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004798
4799 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004800 kmp_r_sched_t new_sched = new_icvs->sched;
4801 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
4802 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004803
4804 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4805
4806 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4807 0, team->t.t_threads[0], team ) );
4808 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4809
4810#if OMP_40_ENABLED
4811# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004812 if ( ( team->t.t_size_changed == 0 )
4813 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004814 if (new_proc_bind == proc_bind_spread) {
4815 __kmp_partition_places(team, 1); // add flag to update only master for spread
4816 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004817 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4818 team->t.t_id, new_proc_bind, team->t.t_first_place,
4819 team->t.t_last_place ) );
4820 }
4821 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004822 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004823 __kmp_partition_places( team );
4824 }
4825# else
4826 if ( team->t.t_proc_bind != new_proc_bind ) {
4827 team->t.t_proc_bind = new_proc_bind;
4828 }
4829# endif /* KMP_AFFINITY_SUPPORTED */
4830#endif /* OMP_40_ENABLED */
4831 }
4832 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004833 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4834
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004835 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004836#if KMP_NESTED_HOT_TEAMS
4837 if( __kmp_hot_teams_mode == 0 ) {
4838 // AC: saved number of threads should correspond to team's value in this mode,
4839 // can be bigger in mode 1, when hot team has some threads in reserve
4840 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4841 hot_teams[level].hot_team_nth = new_nproc;
4842#endif // KMP_NESTED_HOT_TEAMS
4843 /* release the extra threads we don't need any more */
4844 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4845 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004846 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4847 // When decreasing team size, threads no longer in the team should unref task team.
4848 team->t.t_threads[f]->th.th_task_team = NULL;
4849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004850 __kmp_free_thread( team->t.t_threads[ f ] );
4851 team->t.t_threads[ f ] = NULL;
4852 }
4853#if KMP_NESTED_HOT_TEAMS
4854 } // (__kmp_hot_teams_mode == 0)
4855#endif // KMP_NESTED_HOT_TEAMS
4856 team->t.t_nproc = new_nproc;
4857 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4858 team->t.t_sched = new_icvs->sched;
4859 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004860
Jim Cownie5e8470a2013-09-27 10:38:44 +00004861 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004862 for(f = 0; f < new_nproc; ++f) {
4863 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004864 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004865 // restore the current task state of the master thread: should be the implicit task
4866 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4867 0, team->t.t_threads[0], team ) );
4868
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004869 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004870
4871#ifdef KMP_DEBUG
4872 for ( f = 0; f < team->t.t_nproc; f++ ) {
4873 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4874 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4875 }
4876#endif
4877
4878#if OMP_40_ENABLED
4879 team->t.t_proc_bind = new_proc_bind;
Alp Toker98758b02014-03-02 04:12:06 +00004880# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004881 __kmp_partition_places( team );
4882# endif
4883#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004885 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004886#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004887 kmp_affin_mask_t *old_mask;
4888 if ( KMP_AFFINITY_CAPABLE() ) {
4889 KMP_CPU_ALLOC(old_mask);
4890 }
4891#endif
4892
4893 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004895 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004896
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004897#if KMP_NESTED_HOT_TEAMS
4898 int avail_threads = hot_teams[level].hot_team_nth;
4899 if( new_nproc < avail_threads )
4900 avail_threads = new_nproc;
4901 kmp_info_t **other_threads = team->t.t_threads;
4902 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4903 // Adjust barrier data of reserved threads (if any) of the team
4904 // Other data will be set in __kmp_initialize_info() below.
4905 int b;
4906 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4907 for ( b = 0; b < bs_last_barrier; ++ b ) {
4908 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4909 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004910#if USE_DEBUGGER
4911 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4912#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004913 }
4914 }
4915 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4916 // we have all needed threads in reserve, no need to allocate any
4917 // this only possible in mode 1, cannot have reserved threads in mode 0
4918 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4919 team->t.t_nproc = new_nproc; // just get reserved threads involved
4920 } else {
4921 // we may have some threads in reserve, but not enough
4922 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4923 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4924#endif // KMP_NESTED_HOT_TEAMS
4925 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004926 /* reallocate larger arrays */
4927 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004928 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004929 }
4930
Alp Toker98758b02014-03-02 04:12:06 +00004931#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004932 /* Temporarily set full mask for master thread before
4933 creation of workers. The reason is that workers inherit
4934 the affinity from master, so if a lot of workers are
4935 created on the single core quickly, they don't get
4936 a chance to set their own affinity for a long time.
4937 */
4938 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4939#endif
4940
4941 /* allocate new threads for the hot team */
4942 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4943 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4944 KMP_DEBUG_ASSERT( new_worker );
4945 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004946
Jonathan Peytond26e2132015-09-10 18:44:30 +00004947 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004948 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4949 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4950 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4951
4952 { // Initialize barrier data for new threads.
4953 int b;
4954 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004955 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004956 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004957 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004958#if USE_DEBUGGER
4959 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4960#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004961 }
4962 }
4963 }
4964
Alp Toker98758b02014-03-02 04:12:06 +00004965#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004966 if ( KMP_AFFINITY_CAPABLE() ) {
4967 /* Restore initial master thread's affinity mask */
4968 __kmp_set_system_affinity( old_mask, TRUE );
4969 KMP_CPU_FREE(old_mask);
4970 }
4971#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004972#if KMP_NESTED_HOT_TEAMS
4973 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4974#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004975 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00004976 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004977 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004978
Jonathan Peytone03b62f2015-10-08 18:49:40 +00004979 /* reinitialize the threads */
4980 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00004981 for (f=0; f < team->t.t_nproc; ++f)
4982 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4983 if (level) { // set th_task_state for new threads in nested hot team
4984 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00004985 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00004986 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
4987 for (f=old_nproc; f < team->t.t_nproc; ++f)
4988 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004989 }
Jonathan Peyton54127982015-11-04 21:37:48 +00004990 else { // set th_task_state for new threads in non-nested hot team
4991 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
4992 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004993 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004994 }
4995
Jim Cownie5e8470a2013-09-27 10:38:44 +00004996#ifdef KMP_DEBUG
4997 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4998 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4999 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5000 }
5001#endif
5002
5003#if OMP_40_ENABLED
5004 team->t.t_proc_bind = new_proc_bind;
Alp Toker98758b02014-03-02 04:12:06 +00005005# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005006 __kmp_partition_places( team );
5007# endif
5008#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005009 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005010
5011#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005012 kmp_info_t *master = team->t.t_threads[0];
5013 if( master->th.th_teams_microtask ) {
5014 for( f = 1; f < new_nproc; ++f ) {
5015 // propagate teams construct specific info to workers
5016 kmp_info_t *thr = team->t.t_threads[f];
5017 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5018 thr->th.th_teams_level = master->th.th_teams_level;
5019 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005020 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005021 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005022#endif /* OMP_40_ENABLED */
5023#if KMP_NESTED_HOT_TEAMS
5024 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005025 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005026 for( f = 1; f < new_nproc; ++f ) {
5027 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005028 int b;
5029 kmp_balign_t * balign = thr->th.th_bar;
5030 for( b = 0; b < bs_last_barrier; ++ b ) {
5031 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5032 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005033#if USE_DEBUGGER
5034 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5035#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005036 }
5037 }
5038 }
5039#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005040
5041 /* reallocate space for arguments if necessary */
5042 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005043 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005044 //
5045 // The hot team re-uses the previous task team,
5046 // if untouched during the previous release->gather phase.
5047 //
5048
5049 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5050
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005051#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005052 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005053 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5054 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005055 }
5056#endif
5057
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005058#if OMPT_SUPPORT
5059 __ompt_team_assign_id(team, ompt_parallel_id);
5060#endif
5061
Jim Cownie5e8470a2013-09-27 10:38:44 +00005062 KMP_MB();
5063
5064 return team;
5065 }
5066
5067 /* next, let's try to take one from the team pool */
5068 KMP_MB();
5069 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5070 {
5071 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5072 if ( team->t.t_max_nproc >= max_nproc ) {
5073 /* take this team from the team pool */
5074 __kmp_team_pool = team->t.t_next_pool;
5075
5076 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005077 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005078
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005079 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5080 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5081 team->t.t_task_team[0] = NULL;
5082 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005083
5084 /* reallocate space for arguments if necessary */
5085 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005086 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005087
5088 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5089 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5090 { // Initialize barrier data.
5091 int b;
5092 for ( b = 0; b < bs_last_barrier; ++ b) {
5093 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005094#if USE_DEBUGGER
5095 team->t.t_bar[ b ].b_master_arrived = 0;
5096 team->t.t_bar[ b ].b_team_arrived = 0;
5097#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005098 }
5099 }
5100
5101#if OMP_40_ENABLED
5102 team->t.t_proc_bind = new_proc_bind;
5103#endif
5104
5105 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005106
5107#if OMPT_SUPPORT
5108 __ompt_team_assign_id(team, ompt_parallel_id);
5109#endif
5110
Jim Cownie5e8470a2013-09-27 10:38:44 +00005111 KMP_MB();
5112
5113 return team;
5114 }
5115
5116 /* reap team if it is too small, then loop back and check the next one */
5117 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5118 /* TODO: Use technique to find the right size hot-team, don't reap them */
5119 team = __kmp_reap_team( team );
5120 __kmp_team_pool = team;
5121 }
5122
5123 /* nothing available in the pool, no matter, make a new team! */
5124 KMP_MB();
5125 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5126
5127 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005128 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005129 /* NOTE well, for some reason allocating one big buffer and dividing it
5130 * up seems to really hurt performance a lot on the P4, so, let's not use
5131 * this... */
5132 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005133
5134 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005135 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005136
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005137 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5138 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5139 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5140 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005141
5142 if ( __kmp_storage_map ) {
5143 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5144 }
5145
5146 /* allocate space for arguments */
5147 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005148 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005149
5150 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5151 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5152 { // Initialize barrier data.
5153 int b;
5154 for ( b = 0; b < bs_last_barrier; ++ b ) {
5155 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005156#if USE_DEBUGGER
5157 team->t.t_bar[ b ].b_master_arrived = 0;
5158 team->t.t_bar[ b ].b_team_arrived = 0;
5159#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005160 }
5161 }
5162
5163#if OMP_40_ENABLED
5164 team->t.t_proc_bind = new_proc_bind;
5165#endif
5166
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005167#if OMPT_SUPPORT
5168 __ompt_team_assign_id(team, ompt_parallel_id);
5169 team->t.ompt_serialized_team_info = NULL;
5170#endif
5171
Jim Cownie5e8470a2013-09-27 10:38:44 +00005172 KMP_MB();
5173
5174 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5175
5176 return team;
5177}
5178
5179/* TODO implement hot-teams at all levels */
5180/* TODO implement lazy thread release on demand (disband request) */
5181
5182/* free the team. return it to the team pool. release all the threads
5183 * associated with it */
5184void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005185__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005186{
5187 int f;
5188 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5189
5190 /* verify state */
5191 KMP_DEBUG_ASSERT( root );
5192 KMP_DEBUG_ASSERT( team );
5193 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5194 KMP_DEBUG_ASSERT( team->t.t_threads );
5195
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005196 int use_hot_team = team == root->r.r_hot_team;
5197#if KMP_NESTED_HOT_TEAMS
5198 int level;
5199 kmp_hot_team_ptr_t *hot_teams;
5200 if( master ) {
5201 level = team->t.t_active_level - 1;
5202 if( master->th.th_teams_microtask ) { // in teams construct?
5203 if( master->th.th_teams_size.nteams > 1 ) {
5204 ++level; // level was not increased in teams construct for team_of_masters
5205 }
5206 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5207 master->th.th_teams_level == team->t.t_level ) {
5208 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5209 } // team->t.t_level will be increased inside parallel
5210 }
5211 hot_teams = master->th.th_hot_teams;
5212 if( level < __kmp_hot_teams_max_level ) {
5213 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5214 use_hot_team = 1;
5215 }
5216 }
5217#endif // KMP_NESTED_HOT_TEAMS
5218
Jim Cownie5e8470a2013-09-27 10:38:44 +00005219 /* team is done working */
5220 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005221 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005222 // Do not reset pointer to parent team to NULL for hot teams.
5223
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005224 /* if we are non-hot team, release our threads */
5225 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005226 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005227 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005228 int tt_idx;
5229 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005230 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5231 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005232 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5233 team->t.t_threads[f]->th.th_task_team = NULL;
5234 }
5235 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005236#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005237 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005238#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005239 team->t.t_task_team[tt_idx] = NULL;
5240 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005241 }
5242 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005243
5244 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005245 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005246 team->t.t_level = 0;
5247 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005248
Jim Cownie5e8470a2013-09-27 10:38:44 +00005249 /* free the worker threads */
5250 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5251 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5252 __kmp_free_thread( team->t.t_threads[ f ] );
5253 team->t.t_threads[ f ] = NULL;
5254 }
5255
Jim Cownie5e8470a2013-09-27 10:38:44 +00005256 /* put the team back in the team pool */
5257 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005258 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005259 __kmp_team_pool = (volatile kmp_team_t*) team;
5260 }
5261
5262 KMP_MB();
5263}
5264
5265
5266/* reap the team. destroy it, reclaim all its resources and free its memory */
5267kmp_team_t *
5268__kmp_reap_team( kmp_team_t *team )
5269{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005270 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005271
5272 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005273 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5274 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5275 KMP_DEBUG_ASSERT( team->t.t_threads );
5276 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005277
5278 /* TODO clean the threads that are a part of this? */
5279
5280 /* free stuff */
5281
5282 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005283 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5284 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005285 __kmp_free( team );
5286
5287 KMP_MB();
5288 return next_pool;
5289}
5290
5291//
5292// Free the thread. Don't reap it, just place it on the pool of available
5293// threads.
5294//
5295// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5296// binding for the affinity mechanism to be useful.
5297//
5298// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5299// However, we want to avoid a potential performance problem by always
5300// scanning through the list to find the correct point at which to insert
5301// the thread (potential N**2 behavior). To do this we keep track of the
5302// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5303// With single-level parallelism, threads will always be added to the tail
5304// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5305// parallelism, all bets are off and we may need to scan through the entire
5306// free list.
5307//
5308// This change also has a potentially large performance benefit, for some
5309// applications. Previously, as threads were freed from the hot team, they
5310// would be placed back on the free list in inverse order. If the hot team
5311// grew back to it's original size, then the freed thread would be placed
5312// back on the hot team in reverse order. This could cause bad cache
5313// locality problems on programs where the size of the hot team regularly
5314// grew and shrunk.
5315//
5316// Now, for single-level parallelism, the OMP tid is alway == gtid.
5317//
5318void
5319__kmp_free_thread( kmp_info_t *this_th )
5320{
5321 int gtid;
5322 kmp_info_t **scan;
5323
5324 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5325 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5326
5327 KMP_DEBUG_ASSERT( this_th );
5328
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005329 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5330 int b;
5331 kmp_balign_t *balign = this_th->th.th_bar;
5332 for (b=0; b<bs_last_barrier; ++b) {
5333 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5334 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5335 balign[b].bb.team = NULL;
5336 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005337 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005338
Jim Cownie5e8470a2013-09-27 10:38:44 +00005339 /* put thread back on the free pool */
5340 TCW_PTR(this_th->th.th_team, NULL);
5341 TCW_PTR(this_th->th.th_root, NULL);
5342 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5343
5344 //
5345 // If the __kmp_thread_pool_insert_pt is already past the new insert
5346 // point, then we need to re-scan the entire list.
5347 //
5348 gtid = this_th->th.th_info.ds.ds_gtid;
5349 if ( __kmp_thread_pool_insert_pt != NULL ) {
5350 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5351 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5352 __kmp_thread_pool_insert_pt = NULL;
5353 }
5354 }
5355
5356 //
5357 // Scan down the list to find the place to insert the thread.
5358 // scan is the address of a link in the list, possibly the address of
5359 // __kmp_thread_pool itself.
5360 //
5361 // In the absence of nested parallism, the for loop will have 0 iterations.
5362 //
5363 if ( __kmp_thread_pool_insert_pt != NULL ) {
5364 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5365 }
5366 else {
5367 scan = (kmp_info_t **)&__kmp_thread_pool;
5368 }
5369 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5370 scan = &( (*scan)->th.th_next_pool ) );
5371
5372 //
5373 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5374 // to its address.
5375 //
5376 TCW_PTR(this_th->th.th_next_pool, *scan);
5377 __kmp_thread_pool_insert_pt = *scan = this_th;
5378 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5379 || ( this_th->th.th_info.ds.ds_gtid
5380 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5381 TCW_4(this_th->th.th_in_pool, TRUE);
5382 __kmp_thread_pool_nth++;
5383
5384 TCW_4(__kmp_nth, __kmp_nth - 1);
5385
5386#ifdef KMP_ADJUST_BLOCKTIME
5387 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005388 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005389 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5390 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5391 if ( __kmp_nth <= __kmp_avail_proc ) {
5392 __kmp_zero_bt = FALSE;
5393 }
5394 }
5395#endif /* KMP_ADJUST_BLOCKTIME */
5396
5397 KMP_MB();
5398}
5399
Jim Cownie5e8470a2013-09-27 10:38:44 +00005400
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401/* ------------------------------------------------------------------------ */
5402
5403void *
5404__kmp_launch_thread( kmp_info_t *this_thr )
5405{
5406 int gtid = this_thr->th.th_info.ds.ds_gtid;
5407/* void *stack_data;*/
5408 kmp_team_t *(*volatile pteam);
5409
5410 KMP_MB();
5411 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5412
5413 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005414 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005415 }
5416
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005417#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005418 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005419 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5420 this_thr->th.ompt_thread_info.wait_id = 0;
5421 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005422 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005423 __ompt_thread_begin(ompt_thread_worker, gtid);
5424 }
5425 }
5426#endif
5427
Jim Cownie5e8470a2013-09-27 10:38:44 +00005428 /* This is the place where threads wait for work */
5429 while( ! TCR_4(__kmp_global.g.g_done) ) {
5430 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5431 KMP_MB();
5432
5433 /* wait for work to do */
5434 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5435
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005436#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005437 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005438 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5439 }
5440#endif
5441
Jim Cownie5e8470a2013-09-27 10:38:44 +00005442 /* No tid yet since not part of a team */
5443 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5444
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005445#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005446 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005447 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5448 }
5449#endif
5450
Jim Cownie5e8470a2013-09-27 10:38:44 +00005451 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5452
5453 /* have we been allocated? */
5454 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005455#if OMPT_SUPPORT
5456 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005457 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005458 if (ompt_enabled) {
5459 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005460 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005461 }
5462#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005463 /* we were just woken up, so run our new task */
5464 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5465 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005466 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5467 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005468
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005469 updateHWFPControl (*pteam);
5470
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005471#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005472 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005473 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005474 // Initialize OMPT task id for implicit task.
5475 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005476 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005477 }
5478#endif
5479
Jonathan Peyton45be4502015-08-11 21:36:41 +00005480 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005481 {
Jonathan Peyton45be4502015-08-11 21:36:41 +00005482 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005483 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5484 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005485 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486 }
Jonathan Peyton45be4502015-08-11 21:36:41 +00005487 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005488 KMP_ASSERT( rc );
5489
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005490#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005491 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005492 /* no frame set while outside task */
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005493 task_info->frame.exit_runtime_frame = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005494
5495 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5496 }
5497#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005498 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005499 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5500 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005501 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005502 /* join barrier after parallel region */
5503 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005504#if OMPT_SUPPORT && OMPT_TRACE
5505 if (ompt_enabled) {
5506 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005507 // don't access *pteam here: it may have already been freed
5508 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005509 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5510 my_parallel_id, task_info->task_id);
5511 }
5512 task_info->frame.exit_runtime_frame = 0;
5513 task_info->task_id = 0;
5514 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005515#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005516 }
5517 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005518 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005519
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005520#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005521 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005522 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5523 __ompt_thread_end(ompt_thread_worker, gtid);
5524 }
5525#endif
5526
Jonathan Peyton54127982015-11-04 21:37:48 +00005527 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005528 /* run the destructors for the threadprivate data for this thread */
5529 __kmp_common_destroy_gtid( gtid );
5530
5531 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5532 KMP_MB();
5533 return this_thr;
5534}
5535
5536/* ------------------------------------------------------------------------ */
5537/* ------------------------------------------------------------------------ */
5538
Jim Cownie5e8470a2013-09-27 10:38:44 +00005539void
5540__kmp_internal_end_dest( void *specific_gtid )
5541{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005542 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005543 #pragma warning( push )
5544 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5545 #endif
5546 // Make sure no significant bits are lost
5547 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005548 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005549 #pragma warning( pop )
5550 #endif
5551
5552 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5553 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5554 * this is because 0 is reserved for the nothing-stored case */
5555
5556 /* josh: One reason for setting the gtid specific data even when it is being
5557 destroyed by pthread is to allow gtid lookup through thread specific data
5558 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5559 that gets executed in the call to __kmp_internal_end_thread, actually
5560 gets the gtid through the thread specific data. Setting it here seems
5561 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5562 to run smoothly.
5563 todo: get rid of this after we remove the dependence on
5564 __kmp_gtid_get_specific
5565 */
5566 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5567 __kmp_gtid_set_specific( gtid );
5568 #ifdef KMP_TDATA_GTID
5569 __kmp_gtid = gtid;
5570 #endif
5571 __kmp_internal_end_thread( gtid );
5572}
5573
Jonathan Peyton99016992015-05-26 17:32:53 +00005574#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005575
5576// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005577// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005578// option in makefile.mk works fine.
5579
5580__attribute__(( destructor ))
5581void
5582__kmp_internal_end_dtor( void )
5583{
5584 __kmp_internal_end_atexit();
5585}
5586
5587void
5588__kmp_internal_end_fini( void )
5589{
5590 __kmp_internal_end_atexit();
5591}
5592
5593#endif
5594
5595/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5596void
5597__kmp_internal_end_atexit( void )
5598{
5599 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5600 /* [Windows]
5601 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5602 stat code that depends on thread specific data for gtid fails because that data becomes
5603 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5604 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5605 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5606
5607// TODO: Can some of this comment about GVS be removed?
5608 I suspect that the offending stat code is executed when the calling thread tries to
5609 clean up a dead root thread's data structures, resulting in GVS code trying to close
5610 the GVS structures for that thread, but since the stat code uses
5611 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5612 cleaning up itself instead of another thread, it gets confused. This happens because
5613 allowing a thread to unregister and cleanup another thread is a recent modification for
5614 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5615 thread may end up trying to unregister another thread only if thread death does not
5616 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5617 specific data destructor function to detect thread death. For Windows dynamic, there
5618 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5619 workaround is applicable only for Windows static stat library.
5620 */
5621 __kmp_internal_end_library( -1 );
5622 #if KMP_OS_WINDOWS
5623 __kmp_close_console();
5624 #endif
5625}
5626
5627static void
5628__kmp_reap_thread(
5629 kmp_info_t * thread,
5630 int is_root
5631) {
5632
Alp Toker8f2d3f02014-02-24 10:40:15 +00005633 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005634
5635 int gtid;
5636
5637 KMP_DEBUG_ASSERT( thread != NULL );
5638
5639 gtid = thread->th.th_info.ds.ds_gtid;
5640
5641 if ( ! is_root ) {
5642
5643 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5644 /* Assume the threads are at the fork barrier here */
5645 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5646 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005647 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5648 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005649 }; // if
5650
Jim Cownie5e8470a2013-09-27 10:38:44 +00005651 // Terminate OS thread.
5652 __kmp_reap_worker( thread );
5653
5654 //
5655 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005656 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005657 //
5658 // There is a small timing hole here - if the worker thread was
5659 // just waking up after sleeping in the pool, had reset it's
5660 // th_active_in_pool flag but not decremented the global counter
5661 // __kmp_thread_pool_active_nth yet, then the global counter
5662 // might not get updated.
5663 //
5664 // Currently, this can only happen as the library is unloaded,
5665 // so there are no harmful side effects.
5666 //
5667 if ( thread->th.th_active_in_pool ) {
5668 thread->th.th_active_in_pool = FALSE;
5669 KMP_TEST_THEN_DEC32(
5670 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5671 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5672 }
5673
5674 // Decrement # of [worker] threads in the pool.
5675 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5676 --__kmp_thread_pool_nth;
5677 }; // if
5678
5679 // Free the fast memory for tasking
5680 #if USE_FAST_MEMORY
5681 __kmp_free_fast_memory( thread );
5682 #endif /* USE_FAST_MEMORY */
5683
5684 __kmp_suspend_uninitialize_thread( thread );
5685
5686 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5687 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5688
5689 -- __kmp_all_nth;
5690 // __kmp_nth was decremented when thread is added to the pool.
5691
5692#ifdef KMP_ADJUST_BLOCKTIME
5693 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005694 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005695 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5696 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5697 if ( __kmp_nth <= __kmp_avail_proc ) {
5698 __kmp_zero_bt = FALSE;
5699 }
5700 }
5701#endif /* KMP_ADJUST_BLOCKTIME */
5702
5703 /* free the memory being used */
5704 if( __kmp_env_consistency_check ) {
5705 if ( thread->th.th_cons ) {
5706 __kmp_free_cons_stack( thread->th.th_cons );
5707 thread->th.th_cons = NULL;
5708 }; // if
5709 }
5710
5711 if ( thread->th.th_pri_common != NULL ) {
5712 __kmp_free( thread->th.th_pri_common );
5713 thread->th.th_pri_common = NULL;
5714 }; // if
5715
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005716 if (thread->th.th_task_state_memo_stack != NULL) {
5717 __kmp_free(thread->th.th_task_state_memo_stack);
5718 thread->th.th_task_state_memo_stack = NULL;
5719 }
5720
Jim Cownie5e8470a2013-09-27 10:38:44 +00005721 #if KMP_USE_BGET
5722 if ( thread->th.th_local.bget_data != NULL ) {
5723 __kmp_finalize_bget( thread );
5724 }; // if
5725 #endif
5726
Alp Toker98758b02014-03-02 04:12:06 +00005727#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005728 if ( thread->th.th_affin_mask != NULL ) {
5729 KMP_CPU_FREE( thread->th.th_affin_mask );
5730 thread->th.th_affin_mask = NULL;
5731 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005732#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005733
5734 __kmp_reap_team( thread->th.th_serial_team );
5735 thread->th.th_serial_team = NULL;
5736 __kmp_free( thread );
5737
5738 KMP_MB();
5739
5740} // __kmp_reap_thread
5741
5742static void
5743__kmp_internal_end(void)
5744{
5745 int i;
5746
5747 /* First, unregister the library */
5748 __kmp_unregister_library();
5749
5750 #if KMP_OS_WINDOWS
5751 /* In Win static library, we can't tell when a root actually dies, so we
5752 reclaim the data structures for any root threads that have died but not
5753 unregistered themselves, in order to shut down cleanly.
5754 In Win dynamic library we also can't tell when a thread dies.
5755 */
5756 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5757 #endif
5758
5759 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5760 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005761 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005762 break;
5763 KMP_MB(); /* Flush all pending memory write invalidates. */
5764 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5765
5766 if ( i < __kmp_threads_capacity ) {
5767 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5768 KMP_MB(); /* Flush all pending memory write invalidates. */
5769
5770 //
5771 // Need to check that monitor was initialized before reaping it.
5772 // If we are called form __kmp_atfork_child (which sets
5773 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5774 // contain valid data, but it is only valid in the parent process,
5775 // not the child.
5776 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005777 // New behavior (201008): instead of keying off of the flag
5778 // __kmp_init_parallel, the monitor thread creation is keyed off
5779 // of the new flag __kmp_init_monitor.
5780 //
5781 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5782 if ( TCR_4( __kmp_init_monitor ) ) {
5783 __kmp_reap_monitor( & __kmp_monitor );
5784 TCW_4( __kmp_init_monitor, 0 );
5785 }
5786 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5787 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5788 } else {
5789 /* TODO move this to cleanup code */
5790 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005791 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005792 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5793 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005794// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005795 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005796 }
5797 }
5798 #endif
5799
5800 KMP_MB();
5801
5802 // Reap the worker threads.
5803 // This is valid for now, but be careful if threads are reaped sooner.
5804 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5805 // Get the next thread from the pool.
5806 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5807 __kmp_thread_pool = thread->th.th_next_pool;
5808 // Reap it.
5809 thread->th.th_next_pool = NULL;
5810 thread->th.th_in_pool = FALSE;
5811 __kmp_reap_thread( thread, 0 );
5812 }; // while
5813 __kmp_thread_pool_insert_pt = NULL;
5814
5815 // Reap teams.
5816 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5817 // Get the next team from the pool.
5818 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5819 __kmp_team_pool = team->t.t_next_pool;
5820 // Reap it.
5821 team->t.t_next_pool = NULL;
5822 __kmp_reap_team( team );
5823 }; // while
5824
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005825 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005826
5827 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5828 // TBD: Add some checking...
5829 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5830 }
5831
5832 /* Make sure all threadprivate destructors get run by joining with all worker
5833 threads before resetting this flag */
5834 TCW_SYNC_4(__kmp_init_common, FALSE);
5835
5836 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5837 KMP_MB();
5838
5839 //
5840 // See note above: One of the possible fixes for CQ138434 / CQ140126
5841 //
5842 // FIXME: push both code fragments down and CSE them?
5843 // push them into __kmp_cleanup() ?
5844 //
5845 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5846 if ( TCR_4( __kmp_init_monitor ) ) {
5847 __kmp_reap_monitor( & __kmp_monitor );
5848 TCW_4( __kmp_init_monitor, 0 );
5849 }
5850 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5851 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
5852
5853 } /* else !__kmp_global.t_active */
5854 TCW_4(__kmp_init_gtid, FALSE);
5855 KMP_MB(); /* Flush all pending memory write invalidates. */
5856
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005858#if OMPT_SUPPORT
5859 ompt_fini();
5860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861}
5862
5863void
5864__kmp_internal_end_library( int gtid_req )
5865{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005866 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5867 /* this shouldn't be a race condition because __kmp_internal_end() is the
5868 * only place to clear __kmp_serial_init */
5869 /* we'll check this later too, after we get the lock */
5870 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5871 // because the next check will work in any case.
5872 if( __kmp_global.g.g_abort ) {
5873 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5874 /* TODO abort? */
5875 return;
5876 }
5877 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5878 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5879 return;
5880 }
5881
5882
5883 KMP_MB(); /* Flush all pending memory write invalidates. */
5884
5885 /* find out who we are and what we should do */
5886 {
5887 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5888 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5889 if( gtid == KMP_GTID_SHUTDOWN ) {
5890 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5891 return;
5892 } else if( gtid == KMP_GTID_MONITOR ) {
5893 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5894 return;
5895 } else if( gtid == KMP_GTID_DNE ) {
5896 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5897 /* we don't know who we are, but we may still shutdown the library */
5898 } else if( KMP_UBER_GTID( gtid )) {
5899 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005900 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005901 __kmp_global.g.g_abort = -1;
5902 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5903 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5904 return;
5905 } else {
5906 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5907 __kmp_unregister_root_current_thread( gtid );
5908 }
5909 } else {
5910 /* worker threads may call this function through the atexit handler, if they call exit() */
5911 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5912 TODO: do a thorough shutdown instead
5913 */
5914 #ifdef DUMP_DEBUG_ON_EXIT
5915 if ( __kmp_debug_buf )
5916 __kmp_dump_debug_buffer( );
5917 #endif
5918 return;
5919 }
5920 }
5921 /* synchronize the termination process */
5922 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5923
5924 /* have we already finished */
5925 if( __kmp_global.g.g_abort ) {
5926 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5927 /* TODO abort? */
5928 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5929 return;
5930 }
5931 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5932 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5933 return;
5934 }
5935
5936 /* We need this lock to enforce mutex between this reading of
5937 __kmp_threads_capacity and the writing by __kmp_register_root.
5938 Alternatively, we can use a counter of roots that is
5939 atomically updated by __kmp_get_global_thread_id_reg,
5940 __kmp_do_serial_initialize and __kmp_internal_end_*.
5941 */
5942 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5943
5944 /* now we can safely conduct the actual termination */
5945 __kmp_internal_end();
5946
5947 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5948 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5949
5950 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5951
5952 #ifdef DUMP_DEBUG_ON_EXIT
5953 if ( __kmp_debug_buf )
5954 __kmp_dump_debug_buffer();
5955 #endif
5956
5957 #if KMP_OS_WINDOWS
5958 __kmp_close_console();
5959 #endif
5960
5961 __kmp_fini_allocator();
5962
5963} // __kmp_internal_end_library
5964
5965void
5966__kmp_internal_end_thread( int gtid_req )
5967{
5968 int i;
5969
5970 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5971 /* this shouldn't be a race condition because __kmp_internal_end() is the
5972 * only place to clear __kmp_serial_init */
5973 /* we'll check this later too, after we get the lock */
5974 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
5975 // because the next check will work in any case.
5976 if( __kmp_global.g.g_abort ) {
5977 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
5978 /* TODO abort? */
5979 return;
5980 }
5981 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5982 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
5983 return;
5984 }
5985
5986 KMP_MB(); /* Flush all pending memory write invalidates. */
5987
5988 /* find out who we are and what we should do */
5989 {
5990 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5991 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5992 if( gtid == KMP_GTID_SHUTDOWN ) {
5993 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5994 return;
5995 } else if( gtid == KMP_GTID_MONITOR ) {
5996 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5997 return;
5998 } else if( gtid == KMP_GTID_DNE ) {
5999 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6000 return;
6001 /* we don't know who we are */
6002 } else if( KMP_UBER_GTID( gtid )) {
6003 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006004 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006005 __kmp_global.g.g_abort = -1;
6006 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6007 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6008 return;
6009 } else {
6010 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6011 __kmp_unregister_root_current_thread( gtid );
6012 }
6013 } else {
6014 /* just a worker thread, let's leave */
6015 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6016
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006017 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006018 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006019 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006020
6021 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6022 return;
6023 }
6024 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006025 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006026 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6027 // because we will better shutdown later in the library destructor.
6028 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006029 // in a loop forks and joins many openmp threads. We can save a lot of time
6030 // keeping worker threads alive until the program shutdown.
6031 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6032 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006033 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006034 return;
6035 #endif
6036 /* synchronize the termination process */
6037 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6038
6039 /* have we already finished */
6040 if( __kmp_global.g.g_abort ) {
6041 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6042 /* TODO abort? */
6043 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6044 return;
6045 }
6046 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6047 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6048 return;
6049 }
6050
6051 /* We need this lock to enforce mutex between this reading of
6052 __kmp_threads_capacity and the writing by __kmp_register_root.
6053 Alternatively, we can use a counter of roots that is
6054 atomically updated by __kmp_get_global_thread_id_reg,
6055 __kmp_do_serial_initialize and __kmp_internal_end_*.
6056 */
6057
6058 /* should we finish the run-time? are all siblings done? */
6059 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6060
6061 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6062 if ( KMP_UBER_GTID( i ) ) {
6063 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6064 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6065 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6066 return;
6067 };
6068 }
6069
6070 /* now we can safely conduct the actual termination */
6071
6072 __kmp_internal_end();
6073
6074 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6075 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6076
Jim Cownie77c2a632014-09-03 11:34:33 +00006077 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006078
6079 #ifdef DUMP_DEBUG_ON_EXIT
6080 if ( __kmp_debug_buf )
6081 __kmp_dump_debug_buffer();
6082 #endif
6083} // __kmp_internal_end_thread
6084
6085// -------------------------------------------------------------------------------------------------
6086// Library registration stuff.
6087
6088static long __kmp_registration_flag = 0;
6089 // Random value used to indicate library initialization.
6090static char * __kmp_registration_str = NULL;
6091 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6092
6093
6094static inline
6095char *
6096__kmp_reg_status_name() {
6097 /*
6098 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6099 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6100 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6101 */
6102 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6103} // __kmp_reg_status_get
6104
6105
6106void
6107__kmp_register_library_startup(
6108 void
6109) {
6110
6111 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6112 int done = 0;
6113 union {
6114 double dtime;
6115 long ltime;
6116 } time;
6117 #if KMP_OS_WINDOWS
6118 __kmp_initialize_system_tick();
6119 #endif
6120 __kmp_read_system_time( & time.dtime );
6121 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6122 __kmp_registration_str =
6123 __kmp_str_format(
6124 "%p-%lx-%s",
6125 & __kmp_registration_flag,
6126 __kmp_registration_flag,
6127 KMP_LIBRARY_FILE
6128 );
6129
6130 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6131
6132 while ( ! done ) {
6133
6134 char * value = NULL; // Actual value of the environment variable.
6135
6136 // Set environment variable, but do not overwrite if it is exist.
6137 __kmp_env_set( name, __kmp_registration_str, 0 );
6138 // Check the variable is written.
6139 value = __kmp_env_get( name );
6140 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6141
6142 done = 1; // Ok, environment variable set successfully, exit the loop.
6143
6144 } else {
6145
6146 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6147 // Check whether it alive or dead.
6148 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6149 char * tail = value;
6150 char * flag_addr_str = NULL;
6151 char * flag_val_str = NULL;
6152 char const * file_name = NULL;
6153 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6154 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6155 file_name = tail;
6156 if ( tail != NULL ) {
6157 long * flag_addr = 0;
6158 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006159 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6160 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006161 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6162 // First, check whether environment-encoded address is mapped into addr space.
6163 // If so, dereference it to see if it still has the right value.
6164
6165 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6166 neighbor = 1;
6167 } else {
6168 // If not, then we know the other copy of the library is no longer running.
6169 neighbor = 2;
6170 }; // if
6171 }; // if
6172 }; // if
6173 switch ( neighbor ) {
6174 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6175 // Assume it is the incompatible format of future version of the library.
6176 // Assume the other library is alive.
6177 // WARN( ... ); // TODO: Issue a warning.
6178 file_name = "unknown library";
6179 // Attention! Falling to the next case. That's intentional.
6180 case 1 : { // Neighbor is alive.
6181 // Check it is allowed.
6182 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6183 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6184 // That's not allowed. Issue fatal error.
6185 __kmp_msg(
6186 kmp_ms_fatal,
6187 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6188 KMP_HNT( DuplicateLibrary ),
6189 __kmp_msg_null
6190 );
6191 }; // if
6192 KMP_INTERNAL_FREE( duplicate_ok );
6193 __kmp_duplicate_library_ok = 1;
6194 done = 1; // Exit the loop.
6195 } break;
6196 case 2 : { // Neighbor is dead.
6197 // Clear the variable and try to register library again.
6198 __kmp_env_unset( name );
6199 } break;
6200 default : {
6201 KMP_DEBUG_ASSERT( 0 );
6202 } break;
6203 }; // switch
6204
6205 }; // if
6206 KMP_INTERNAL_FREE( (void *) value );
6207
6208 }; // while
6209 KMP_INTERNAL_FREE( (void *) name );
6210
6211} // func __kmp_register_library_startup
6212
6213
6214void
6215__kmp_unregister_library( void ) {
6216
6217 char * name = __kmp_reg_status_name();
6218 char * value = __kmp_env_get( name );
6219
6220 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6221 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6222 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6223 // Ok, this is our variable. Delete it.
6224 __kmp_env_unset( name );
6225 }; // if
6226
6227 KMP_INTERNAL_FREE( __kmp_registration_str );
6228 KMP_INTERNAL_FREE( value );
6229 KMP_INTERNAL_FREE( name );
6230
6231 __kmp_registration_flag = 0;
6232 __kmp_registration_str = NULL;
6233
6234} // __kmp_unregister_library
6235
6236
6237// End of Library registration stuff.
6238// -------------------------------------------------------------------------------------------------
6239
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006240#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6241
6242static void __kmp_check_mic_type()
6243{
6244 kmp_cpuid_t cpuid_state = {0};
6245 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006246 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006247 // We don't support mic1 at the moment
6248 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6249 __kmp_mic_type = mic2;
6250 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6251 __kmp_mic_type = mic3;
6252 } else {
6253 __kmp_mic_type = non_mic;
6254 }
6255}
6256
6257#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6258
Jim Cownie5e8470a2013-09-27 10:38:44 +00006259static void
6260__kmp_do_serial_initialize( void )
6261{
6262 int i, gtid;
6263 int size;
6264
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006265 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006266
6267 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6268 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6269 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6270 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6271 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6272
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006273#if OMPT_SUPPORT
6274 ompt_pre_init();
6275#endif
6276
Jim Cownie5e8470a2013-09-27 10:38:44 +00006277 __kmp_validate_locks();
6278
6279 /* Initialize internal memory allocator */
6280 __kmp_init_allocator();
6281
6282 /* Register the library startup via an environment variable
6283 and check to see whether another copy of the library is already
6284 registered. */
6285
6286 __kmp_register_library_startup( );
6287
6288 /* TODO reinitialization of library */
6289 if( TCR_4(__kmp_global.g.g_done) ) {
6290 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6291 }
6292
6293 __kmp_global.g.g_abort = 0;
6294 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6295
6296 /* initialize the locks */
6297#if KMP_USE_ADAPTIVE_LOCKS
6298#if KMP_DEBUG_ADAPTIVE_LOCKS
6299 __kmp_init_speculative_stats();
6300#endif
6301#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006302#if KMP_STATS_ENABLED
6303 __kmp_init_tas_lock( & __kmp_stats_lock );
6304#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006305 __kmp_init_lock( & __kmp_global_lock );
6306 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6307 __kmp_init_lock( & __kmp_debug_lock );
6308 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6309 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6310 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6311 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6312 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6313 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6314 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6315 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6316 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6317 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6318 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6319 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6320 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6321 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6322 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6323 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6324 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6325
6326 /* conduct initialization and initial setup of configuration */
6327
6328 __kmp_runtime_initialize();
6329
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006330#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6331 __kmp_check_mic_type();
6332#endif
6333
Jim Cownie5e8470a2013-09-27 10:38:44 +00006334 // Some global variable initialization moved here from kmp_env_initialize()
6335#ifdef KMP_DEBUG
6336 kmp_diag = 0;
6337#endif
6338 __kmp_abort_delay = 0;
6339
6340 // From __kmp_init_dflt_team_nth()
6341 /* assume the entire machine will be used */
6342 __kmp_dflt_team_nth_ub = __kmp_xproc;
6343 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6344 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6345 }
6346 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6347 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6348 }
6349 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006350
6351 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6352 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6353 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6354 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6355 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6356 __kmp_library = library_throughput;
6357 // From KMP_SCHEDULE initialization
6358 __kmp_static = kmp_sch_static_balanced;
6359 // AC: do not use analytical here, because it is non-monotonous
6360 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006361 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6363 // control parts
6364 #if KMP_FAST_REDUCTION_BARRIER
6365 #define kmp_reduction_barrier_gather_bb ((int)1)
6366 #define kmp_reduction_barrier_release_bb ((int)1)
6367 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6368 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6369 #endif // KMP_FAST_REDUCTION_BARRIER
6370 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6371 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6372 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6373 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6374 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6375 #if KMP_FAST_REDUCTION_BARRIER
6376 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6377 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6378 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6379 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6380 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6381 }
6382 #endif // KMP_FAST_REDUCTION_BARRIER
6383 }
6384 #if KMP_FAST_REDUCTION_BARRIER
6385 #undef kmp_reduction_barrier_release_pat
6386 #undef kmp_reduction_barrier_gather_pat
6387 #undef kmp_reduction_barrier_release_bb
6388 #undef kmp_reduction_barrier_gather_bb
6389 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006390#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006391 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006392 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006393 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006394 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6395 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6396 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6397 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006398#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006399 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006400 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6401 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6402 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006403#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006404#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006405
6406 // From KMP_CHECKS initialization
6407#ifdef KMP_DEBUG
6408 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6409#else
6410 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6411#endif
6412
6413 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6414 __kmp_foreign_tp = TRUE;
6415
6416 __kmp_global.g.g_dynamic = FALSE;
6417 __kmp_global.g.g_dynamic_mode = dynamic_default;
6418
6419 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006420
Jim Cownie5e8470a2013-09-27 10:38:44 +00006421 // Print all messages in message catalog for testing purposes.
6422 #ifdef KMP_DEBUG
6423 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6424 if ( __kmp_str_match_true( val ) ) {
6425 kmp_str_buf_t buffer;
6426 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006427 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006428 __kmp_printf( "%s", buffer.str );
6429 __kmp_str_buf_free( & buffer );
6430 }; // if
6431 __kmp_env_free( & val );
6432 #endif
6433
Jim Cownie181b4bb2013-12-23 17:28:57 +00006434 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006435 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6436 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6437
Jim Cownie5e8470a2013-09-27 10:38:44 +00006438 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6439 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6440 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6441 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6442 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6443 __kmp_thread_pool = NULL;
6444 __kmp_thread_pool_insert_pt = NULL;
6445 __kmp_team_pool = NULL;
6446
6447 /* Allocate all of the variable sized records */
6448 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6449 /* Since allocation is cache-aligned, just add extra padding at the end */
6450 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6451 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6452 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6453
6454 /* init thread counts */
6455 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6456 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6457 __kmp_all_nth = 0;
6458 __kmp_nth = 0;
6459
6460 /* setup the uber master thread and hierarchy */
6461 gtid = __kmp_register_root( TRUE );
6462 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6463 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6464 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6465
6466 KMP_MB(); /* Flush all pending memory write invalidates. */
6467
6468 __kmp_common_initialize();
6469
6470 #if KMP_OS_UNIX
6471 /* invoke the child fork handler */
6472 __kmp_register_atfork();
6473 #endif
6474
Jonathan Peyton99016992015-05-26 17:32:53 +00006475 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006476 {
6477 /* Invoke the exit handler when the program finishes, only for static library.
6478 For dynamic library, we already have _fini and DllMain.
6479 */
6480 int rc = atexit( __kmp_internal_end_atexit );
6481 if ( rc != 0 ) {
6482 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6483 }; // if
6484 }
6485 #endif
6486
6487 #if KMP_HANDLE_SIGNALS
6488 #if KMP_OS_UNIX
6489 /* NOTE: make sure that this is called before the user installs
6490 * their own signal handlers so that the user handlers
6491 * are called first. this way they can return false,
6492 * not call our handler, avoid terminating the library,
6493 * and continue execution where they left off. */
6494 __kmp_install_signals( FALSE );
6495 #endif /* KMP_OS_UNIX */
6496 #if KMP_OS_WINDOWS
6497 __kmp_install_signals( TRUE );
6498 #endif /* KMP_OS_WINDOWS */
6499 #endif
6500
6501 /* we have finished the serial initialization */
6502 __kmp_init_counter ++;
6503
6504 __kmp_init_serial = TRUE;
6505
6506 if (__kmp_settings) {
6507 __kmp_env_print();
6508 }
6509
6510#if OMP_40_ENABLED
6511 if (__kmp_display_env || __kmp_display_env_verbose) {
6512 __kmp_env_print_2();
6513 }
6514#endif // OMP_40_ENABLED
6515
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006516#if OMPT_SUPPORT
6517 ompt_post_init();
6518#endif
6519
Jim Cownie5e8470a2013-09-27 10:38:44 +00006520 KMP_MB();
6521
6522 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6523}
6524
6525void
6526__kmp_serial_initialize( void )
6527{
6528 if ( __kmp_init_serial ) {
6529 return;
6530 }
6531 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6532 if ( __kmp_init_serial ) {
6533 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6534 return;
6535 }
6536 __kmp_do_serial_initialize();
6537 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6538}
6539
6540static void
6541__kmp_do_middle_initialize( void )
6542{
6543 int i, j;
6544 int prev_dflt_team_nth;
6545
6546 if( !__kmp_init_serial ) {
6547 __kmp_do_serial_initialize();
6548 }
6549
6550 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6551
6552 //
6553 // Save the previous value for the __kmp_dflt_team_nth so that
6554 // we can avoid some reinitialization if it hasn't changed.
6555 //
6556 prev_dflt_team_nth = __kmp_dflt_team_nth;
6557
Alp Toker98758b02014-03-02 04:12:06 +00006558#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006559 //
6560 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6561 // number of cores on the machine.
6562 //
6563 __kmp_affinity_initialize();
6564
6565 //
6566 // Run through the __kmp_threads array and set the affinity mask
6567 // for each root thread that is currently registered with the RTL.
6568 //
6569 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6570 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6571 __kmp_affinity_set_init_mask( i, TRUE );
6572 }
6573 }
Alp Toker98758b02014-03-02 04:12:06 +00006574#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006575
6576 KMP_ASSERT( __kmp_xproc > 0 );
6577 if ( __kmp_avail_proc == 0 ) {
6578 __kmp_avail_proc = __kmp_xproc;
6579 }
6580
6581 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6582 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006583 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006584 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6585 j++;
6586 }
6587
6588 if ( __kmp_dflt_team_nth == 0 ) {
6589#ifdef KMP_DFLT_NTH_CORES
6590 //
6591 // Default #threads = #cores
6592 //
6593 __kmp_dflt_team_nth = __kmp_ncores;
6594 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6595 __kmp_dflt_team_nth ) );
6596#else
6597 //
6598 // Default #threads = #available OS procs
6599 //
6600 __kmp_dflt_team_nth = __kmp_avail_proc;
6601 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6602 __kmp_dflt_team_nth ) );
6603#endif /* KMP_DFLT_NTH_CORES */
6604 }
6605
6606 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6607 __kmp_dflt_team_nth = KMP_MIN_NTH;
6608 }
6609 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6610 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6611 }
6612
6613 //
6614 // There's no harm in continuing if the following check fails,
6615 // but it indicates an error in the previous logic.
6616 //
6617 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6618
6619 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6620 //
6621 // Run through the __kmp_threads array and set the num threads icv
6622 // for each root thread that is currently registered with the RTL
6623 // (which has not already explicitly set its nthreads-var with a
6624 // call to omp_set_num_threads()).
6625 //
6626 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6627 kmp_info_t *thread = __kmp_threads[ i ];
6628 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006629 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006630
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006631 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006632 }
6633 }
6634 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6635 __kmp_dflt_team_nth) );
6636
6637#ifdef KMP_ADJUST_BLOCKTIME
6638 /* Adjust blocktime to zero if necessary */
6639 /* now that __kmp_avail_proc is set */
6640 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6641 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6642 if ( __kmp_nth > __kmp_avail_proc ) {
6643 __kmp_zero_bt = TRUE;
6644 }
6645 }
6646#endif /* KMP_ADJUST_BLOCKTIME */
6647
6648 /* we have finished middle initialization */
6649 TCW_SYNC_4(__kmp_init_middle, TRUE);
6650
6651 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6652}
6653
6654void
6655__kmp_middle_initialize( void )
6656{
6657 if ( __kmp_init_middle ) {
6658 return;
6659 }
6660 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6661 if ( __kmp_init_middle ) {
6662 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6663 return;
6664 }
6665 __kmp_do_middle_initialize();
6666 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6667}
6668
6669void
6670__kmp_parallel_initialize( void )
6671{
6672 int gtid = __kmp_entry_gtid(); // this might be a new root
6673
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006674 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006675 if( TCR_4(__kmp_init_parallel) ) return;
6676 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6677 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6678
6679 /* TODO reinitialization after we have already shut down */
6680 if( TCR_4(__kmp_global.g.g_done) ) {
6681 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6682 __kmp_infinite_loop();
6683 }
6684
6685 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6686 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6687 */
6688 if( !__kmp_init_middle ) {
6689 __kmp_do_middle_initialize();
6690 }
6691
6692 /* begin initialization */
6693 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6694 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6695
6696#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6697 //
6698 // Save the FP control regs.
6699 // Worker threads will set theirs to these values at thread startup.
6700 //
6701 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6702 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6703 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6704#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6705
6706#if KMP_OS_UNIX
6707# if KMP_HANDLE_SIGNALS
6708 /* must be after __kmp_serial_initialize */
6709 __kmp_install_signals( TRUE );
6710# endif
6711#endif
6712
6713 __kmp_suspend_initialize();
6714
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006715#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006716 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6717 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6718 }
6719#else
6720 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6721 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6722 }
6723#endif
6724
6725 if ( __kmp_version ) {
6726 __kmp_print_version_2();
6727 }
6728
Jim Cownie5e8470a2013-09-27 10:38:44 +00006729 /* we have finished parallel initialization */
6730 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6731
6732 KMP_MB();
6733 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6734
6735 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6736}
6737
6738
6739/* ------------------------------------------------------------------------ */
6740
6741void
6742__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6743 kmp_team_t *team )
6744{
6745 kmp_disp_t *dispatch;
6746
6747 KMP_MB();
6748
6749 /* none of the threads have encountered any constructs, yet. */
6750 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006752 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006753#endif /* KMP_CACHE_MANAGE */
6754 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6755 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006756 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6757 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006758
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006759 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006760#if OMP_41_ENABLED
6761 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6762#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006763 if( __kmp_env_consistency_check )
6764 __kmp_push_parallel( gtid, team->t.t_ident );
6765
6766 KMP_MB(); /* Flush all pending memory write invalidates. */
6767}
6768
6769void
6770__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6771 kmp_team_t *team )
6772{
6773 if( __kmp_env_consistency_check )
6774 __kmp_pop_parallel( gtid, team->t.t_ident );
6775}
6776
6777int
6778__kmp_invoke_task_func( int gtid )
6779{
6780 int rc;
6781 int tid = __kmp_tid_from_gtid( gtid );
6782 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006783 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006784
6785 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6786#if USE_ITT_BUILD
6787 if ( __itt_stack_caller_create_ptr ) {
6788 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6789 }
6790#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006791#if INCLUDE_SSC_MARKS
6792 SSC_MARK_INVOKING();
6793#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006794
6795#if OMPT_SUPPORT
6796 void *dummy;
6797 void **exit_runtime_p;
6798 ompt_task_id_t my_task_id;
6799 ompt_parallel_id_t my_parallel_id;
6800
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006801 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006802 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6803 ompt_task_info.frame.exit_runtime_frame);
6804 } else {
6805 exit_runtime_p = &dummy;
6806 }
6807
6808#if OMPT_TRACE
6809 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6810 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006811 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006812 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6813 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6814 my_parallel_id, my_task_id);
6815 }
6816#endif
6817#endif
6818
Jonathan Peyton45be4502015-08-11 21:36:41 +00006819 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006820 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6821 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006822 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6823 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006824#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006825 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006826#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006827 );
6828 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006829
Jim Cownie5e8470a2013-09-27 10:38:44 +00006830#if USE_ITT_BUILD
6831 if ( __itt_stack_caller_create_ptr ) {
6832 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6833 }
6834#endif /* USE_ITT_BUILD */
6835 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6836
6837 return rc;
6838}
6839
6840#if OMP_40_ENABLED
6841void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006842__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006843{
6844 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006845 kmp_info_t *thr = __kmp_threads[ gtid ];
6846 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006847 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006848 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6849 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6850 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006852 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006853 // Launch league of teams now, but not let workers execute
6854 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006855#if INCLUDE_SSC_MARKS
6856 SSC_MARK_FORKING();
6857#endif
6858 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006859 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006860#if OMPT_SUPPORT
6861 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6862#endif
6863 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006864 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6865 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006866#if INCLUDE_SSC_MARKS
6867 SSC_MARK_JOINING();
6868#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006869
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006870 // AC: last parameter "1" eliminates join barrier which won't work because
6871 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006872 __kmp_join_call( loc, gtid
6873#if OMPT_SUPPORT
6874 , fork_context_intel
6875#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006876 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006877}
6878
6879int
6880__kmp_invoke_teams_master( int gtid )
6881{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006882 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6883 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884 #if KMP_DEBUG
6885 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6886 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6887 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006888 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6889 __kmp_teams_master( gtid );
6890 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006891 return 1;
6892}
6893#endif /* OMP_40_ENABLED */
6894
6895/* this sets the requested number of threads for the next parallel region
6896 * encountered by this team */
6897/* since this should be enclosed in the forkjoin critical section it
6898 * should avoid race conditions with assymmetrical nested parallelism */
6899
6900void
6901__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6902{
6903 kmp_info_t *thr = __kmp_threads[gtid];
6904
6905 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006906 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006907}
6908
6909#if OMP_40_ENABLED
6910
6911/* this sets the requested number of teams for the teams region and/or
6912 * the number of threads for the next parallel region encountered */
6913void
6914__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6915{
6916 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006917 KMP_DEBUG_ASSERT(num_teams >= 0);
6918 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006919
6920 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006922 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6923 if ( !__kmp_reserve_warn ) {
6924 __kmp_reserve_warn = 1;
6925 __kmp_msg(
6926 kmp_ms_warning,
6927 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6928 KMP_HNT( Unset_ALL_THREADS ),
6929 __kmp_msg_null
6930 );
6931 }
6932 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006933 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006934 // Set number of teams (number of threads in the outer "parallel" of the teams)
6935 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6936
6937 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006938 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006939 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006940 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006941 num_threads = __kmp_avail_proc / num_teams;
6942 if( num_teams * num_threads > __kmp_max_nth ) {
6943 // adjust num_threads w/o warning as it is not user setting
6944 num_threads = __kmp_max_nth / num_teams;
6945 }
6946 } else {
6947 if( num_teams * num_threads > __kmp_max_nth ) {
6948 int new_threads = __kmp_max_nth / num_teams;
6949 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6950 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6951 __kmp_msg(
6952 kmp_ms_warning,
6953 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6954 KMP_HNT( Unset_ALL_THREADS ),
6955 __kmp_msg_null
6956 );
6957 }
6958 num_threads = new_threads;
6959 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006961 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006962}
6963
6964
6965//
6966// Set the proc_bind var to use in the following parallel region.
6967//
6968void
6969__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
6970{
6971 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006972 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006973}
6974
6975#endif /* OMP_40_ENABLED */
6976
6977/* Launch the worker threads into the microtask. */
6978
6979void
6980__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
6981{
6982 kmp_info_t *this_thr = __kmp_threads[gtid];
6983
6984#ifdef KMP_DEBUG
6985 int f;
6986#endif /* KMP_DEBUG */
6987
6988 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006989 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006990 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6991 KMP_MB(); /* Flush all pending memory write invalidates. */
6992
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006993 team->t.t_construct = 0; /* no single directives seen yet */
6994 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006995
6996 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006997 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006998 if ( team->t.t_max_nproc > 1 ) {
6999 int i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007000 for (i = 0; i < KMP_MAX_DISP_BUF; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007001 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007002#if OMP_41_ENABLED
7003 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7004#endif
7005 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007006 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007007 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007008#if OMP_41_ENABLED
7009 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7010#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007011 }
7012
7013 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007014 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007015
7016#ifdef KMP_DEBUG
7017 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7018 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7019 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7020 }
7021#endif /* KMP_DEBUG */
7022
7023 /* release the worker threads so they may begin working */
7024 __kmp_fork_barrier( gtid, 0 );
7025}
7026
7027
7028void
7029__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7030{
7031 kmp_info_t *this_thr = __kmp_threads[gtid];
7032
7033 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007034 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7036 KMP_MB(); /* Flush all pending memory write invalidates. */
7037
7038 /* Join barrier after fork */
7039
7040#ifdef KMP_DEBUG
7041 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7042 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7043 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7044 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7045 __kmp_print_structure();
7046 }
7047 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7048 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7049#endif /* KMP_DEBUG */
7050
7051 __kmp_join_barrier( gtid ); /* wait for everyone */
7052
7053 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007054 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007055}
7056
7057
7058/* ------------------------------------------------------------------------ */
7059/* ------------------------------------------------------------------------ */
7060
7061#ifdef USE_LOAD_BALANCE
7062
7063//
7064// Return the worker threads actively spinning in the hot team, if we
7065// are at the outermost level of parallelism. Otherwise, return 0.
7066//
7067static int
7068__kmp_active_hot_team_nproc( kmp_root_t *root )
7069{
7070 int i;
7071 int retval;
7072 kmp_team_t *hot_team;
7073
7074 if ( root->r.r_active ) {
7075 return 0;
7076 }
7077 hot_team = root->r.r_hot_team;
7078 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7079 return hot_team->t.t_nproc - 1; // Don't count master thread
7080 }
7081
7082 //
7083 // Skip the master thread - it is accounted for elsewhere.
7084 //
7085 retval = 0;
7086 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7087 if ( hot_team->t.t_threads[i]->th.th_active ) {
7088 retval++;
7089 }
7090 }
7091 return retval;
7092}
7093
7094//
7095// Perform an automatic adjustment to the number of
7096// threads used by the next parallel region.
7097//
7098static int
7099__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7100{
7101 int retval;
7102 int pool_active;
7103 int hot_team_active;
7104 int team_curr_active;
7105 int system_active;
7106
7107 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7108 root, set_nproc ) );
7109 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007110 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007111 KMP_DEBUG_ASSERT( set_nproc > 1 );
7112
7113 if ( set_nproc == 1) {
7114 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7115 return 1;
7116 }
7117
7118 //
7119 // Threads that are active in the thread pool, active in the hot team
7120 // for this particular root (if we are at the outer par level), and
7121 // the currently executing thread (to become the master) are available
7122 // to add to the new team, but are currently contributing to the system
7123 // load, and must be accounted for.
7124 //
7125 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7126 hot_team_active = __kmp_active_hot_team_nproc( root );
7127 team_curr_active = pool_active + hot_team_active + 1;
7128
7129 //
7130 // Check the system load.
7131 //
7132 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7133 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7134 system_active, pool_active, hot_team_active ) );
7135
7136 if ( system_active < 0 ) {
7137 //
7138 // There was an error reading the necessary info from /proc,
7139 // so use the thread limit algorithm instead. Once we set
7140 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7141 // we shouldn't wind up getting back here.
7142 //
7143 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7144 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7145
7146 //
7147 // Make this call behave like the thread limit algorithm.
7148 //
7149 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7150 : root->r.r_hot_team->t.t_nproc);
7151 if ( retval > set_nproc ) {
7152 retval = set_nproc;
7153 }
7154 if ( retval < KMP_MIN_NTH ) {
7155 retval = KMP_MIN_NTH;
7156 }
7157
7158 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7159 return retval;
7160 }
7161
7162 //
7163 // There is a slight delay in the load balance algorithm in detecting
7164 // new running procs. The real system load at this instant should be
7165 // at least as large as the #active omp thread that are available to
7166 // add to the team.
7167 //
7168 if ( system_active < team_curr_active ) {
7169 system_active = team_curr_active;
7170 }
7171 retval = __kmp_avail_proc - system_active + team_curr_active;
7172 if ( retval > set_nproc ) {
7173 retval = set_nproc;
7174 }
7175 if ( retval < KMP_MIN_NTH ) {
7176 retval = KMP_MIN_NTH;
7177 }
7178
7179 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7180 return retval;
7181} // __kmp_load_balance_nproc()
7182
7183#endif /* USE_LOAD_BALANCE */
7184
Jim Cownie5e8470a2013-09-27 10:38:44 +00007185/* ------------------------------------------------------------------------ */
7186/* ------------------------------------------------------------------------ */
7187
7188/* NOTE: this is called with the __kmp_init_lock held */
7189void
7190__kmp_cleanup( void )
7191{
7192 int f;
7193
7194 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7195
7196 if (TCR_4(__kmp_init_parallel)) {
7197#if KMP_HANDLE_SIGNALS
7198 __kmp_remove_signals();
7199#endif
7200 TCW_4(__kmp_init_parallel, FALSE);
7201 }
7202
7203 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007204#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007205 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007206#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007207 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007208 TCW_4(__kmp_init_middle, FALSE);
7209 }
7210
7211 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7212
7213 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007214 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007215 __kmp_init_serial = FALSE;
7216 }
7217
7218 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7219 if ( __kmp_root[ f ] != NULL ) {
7220 __kmp_free( __kmp_root[ f ] );
7221 __kmp_root[ f ] = NULL;
7222 }
7223 }
7224 __kmp_free( __kmp_threads );
7225 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7226 // freeing __kmp_root.
7227 __kmp_threads = NULL;
7228 __kmp_root = NULL;
7229 __kmp_threads_capacity = 0;
7230
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007231#if KMP_USE_DYNAMIC_LOCK
7232 __kmp_cleanup_indirect_user_locks();
7233#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007234 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007235#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007236
Alp Toker98758b02014-03-02 04:12:06 +00007237 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007238 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7239 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007240 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007241
7242 #if KMP_USE_ADAPTIVE_LOCKS
7243 #if KMP_DEBUG_ADAPTIVE_LOCKS
7244 __kmp_print_speculative_stats();
7245 #endif
7246 #endif
7247 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7248 __kmp_nested_nth.nth = NULL;
7249 __kmp_nested_nth.size = 0;
7250 __kmp_nested_nth.used = 0;
7251
7252 __kmp_i18n_catclose();
7253
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007254#if KMP_STATS_ENABLED
7255 __kmp_accumulate_stats_at_exit();
7256 __kmp_stats_list.deallocate();
7257#endif
7258
Jim Cownie5e8470a2013-09-27 10:38:44 +00007259 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7260}
7261
7262/* ------------------------------------------------------------------------ */
7263/* ------------------------------------------------------------------------ */
7264
7265int
7266__kmp_ignore_mppbeg( void )
7267{
7268 char *env;
7269
7270 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7271 if (__kmp_str_match_false( env ))
7272 return FALSE;
7273 }
7274 // By default __kmpc_begin() is no-op.
7275 return TRUE;
7276}
7277
7278int
7279__kmp_ignore_mppend( void )
7280{
7281 char *env;
7282
7283 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7284 if (__kmp_str_match_false( env ))
7285 return FALSE;
7286 }
7287 // By default __kmpc_end() is no-op.
7288 return TRUE;
7289}
7290
7291void
7292__kmp_internal_begin( void )
7293{
7294 int gtid;
7295 kmp_root_t *root;
7296
7297 /* this is a very important step as it will register new sibling threads
7298 * and assign these new uber threads a new gtid */
7299 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007300 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007301 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7302
7303 if( root->r.r_begin ) return;
7304 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7305 if( root->r.r_begin ) {
7306 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7307 return;
7308 }
7309
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007310 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311
7312 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7313}
7314
7315
7316/* ------------------------------------------------------------------------ */
7317/* ------------------------------------------------------------------------ */
7318
7319void
7320__kmp_user_set_library (enum library_type arg)
7321{
7322 int gtid;
7323 kmp_root_t *root;
7324 kmp_info_t *thread;
7325
7326 /* first, make sure we are initialized so we can get our gtid */
7327
7328 gtid = __kmp_entry_gtid();
7329 thread = __kmp_threads[ gtid ];
7330
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007331 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007332
7333 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7334 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7335 KMP_WARNING( SetLibraryIncorrectCall );
7336 return;
7337 }
7338
7339 switch ( arg ) {
7340 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007341 thread->th.th_set_nproc = 0;
7342 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007343 break;
7344 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007345 thread->th.th_set_nproc = 0;
7346 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007347 break;
7348 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007349 thread->th.th_set_nproc = 0;
7350 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007351 break;
7352 default:
7353 KMP_FATAL( UnknownLibraryType, arg );
7354 }
7355
7356 __kmp_aux_set_library ( arg );
7357}
7358
7359void
7360__kmp_aux_set_stacksize( size_t arg )
7361{
7362 if (! __kmp_init_serial)
7363 __kmp_serial_initialize();
7364
7365#if KMP_OS_DARWIN
7366 if (arg & (0x1000 - 1)) {
7367 arg &= ~(0x1000 - 1);
7368 if(arg + 0x1000) /* check for overflow if we round up */
7369 arg += 0x1000;
7370 }
7371#endif
7372 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7373
7374 /* only change the default stacksize before the first parallel region */
7375 if (! TCR_4(__kmp_init_parallel)) {
7376 size_t value = arg; /* argument is in bytes */
7377
7378 if (value < __kmp_sys_min_stksize )
7379 value = __kmp_sys_min_stksize ;
7380 else if (value > KMP_MAX_STKSIZE)
7381 value = KMP_MAX_STKSIZE;
7382
7383 __kmp_stksize = value;
7384
7385 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7386 }
7387
7388 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7389}
7390
7391/* set the behaviour of the runtime library */
7392/* TODO this can cause some odd behaviour with sibling parallelism... */
7393void
7394__kmp_aux_set_library (enum library_type arg)
7395{
7396 __kmp_library = arg;
7397
7398 switch ( __kmp_library ) {
7399 case library_serial :
7400 {
7401 KMP_INFORM( LibraryIsSerial );
7402 (void) __kmp_change_library( TRUE );
7403 }
7404 break;
7405 case library_turnaround :
7406 (void) __kmp_change_library( TRUE );
7407 break;
7408 case library_throughput :
7409 (void) __kmp_change_library( FALSE );
7410 break;
7411 default:
7412 KMP_FATAL( UnknownLibraryType, arg );
7413 }
7414}
7415
7416/* ------------------------------------------------------------------------ */
7417/* ------------------------------------------------------------------------ */
7418
7419void
7420__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7421{
7422 int blocktime = arg; /* argument is in milliseconds */
7423 int bt_intervals;
7424 int bt_set;
7425
7426 __kmp_save_internal_controls( thread );
7427
7428 /* Normalize and set blocktime for the teams */
7429 if (blocktime < KMP_MIN_BLOCKTIME)
7430 blocktime = KMP_MIN_BLOCKTIME;
7431 else if (blocktime > KMP_MAX_BLOCKTIME)
7432 blocktime = KMP_MAX_BLOCKTIME;
7433
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007434 set__blocktime_team( thread->th.th_team, tid, blocktime );
7435 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007436
7437 /* Calculate and set blocktime intervals for the teams */
7438 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7439
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007440 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7441 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007442
7443 /* Set whether blocktime has been set to "TRUE" */
7444 bt_set = TRUE;
7445
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007446 set__bt_set_team( thread->th.th_team, tid, bt_set );
7447 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007448 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7449 __kmp_gtid_from_tid(tid, thread->th.th_team),
7450 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7451}
7452
7453void
7454__kmp_aux_set_defaults(
7455 char const * str,
7456 int len
7457) {
7458 if ( ! __kmp_init_serial ) {
7459 __kmp_serial_initialize();
7460 };
7461 __kmp_env_initialize( str );
7462
7463 if (__kmp_settings
7464#if OMP_40_ENABLED
7465 || __kmp_display_env || __kmp_display_env_verbose
7466#endif // OMP_40_ENABLED
7467 ) {
7468 __kmp_env_print();
7469 }
7470} // __kmp_aux_set_defaults
7471
7472/* ------------------------------------------------------------------------ */
7473
7474/*
7475 * internal fast reduction routines
7476 */
7477
Jim Cownie5e8470a2013-09-27 10:38:44 +00007478PACKED_REDUCTION_METHOD_T
7479__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7480 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7481 kmp_critical_name *lck )
7482{
7483
7484 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7485 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7486 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7487 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7488
7489 PACKED_REDUCTION_METHOD_T retval;
7490
7491 int team_size;
7492
7493 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7494 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7495
7496 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7497 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7498
7499 retval = critical_reduce_block;
7500
7501 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7502
7503 if( team_size == 1 ) {
7504
7505 retval = empty_reduce_block;
7506
7507 } else {
7508
7509 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7510 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7511
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007512 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007513
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007514 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007515
7516 int teamsize_cutoff = 4;
7517
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007518#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7519 if( __kmp_mic_type != non_mic ) {
7520 teamsize_cutoff = 8;
7521 }
7522#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007524 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007525 if ( atomic_available ) {
7526 retval = atomic_reduce_block;
7527 }
7528 } else {
7529 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7530 }
7531 } else if ( atomic_available ) {
7532 retval = atomic_reduce_block;
7533 }
7534 #else
7535 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007536 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007537
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007538 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
Jim Cownie5e8470a2013-09-27 10:38:44 +00007539
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007540 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007541
Jim Cownie5e8470a2013-09-27 10:38:44 +00007542 // basic tuning
7543
7544 if( atomic_available ) {
7545 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7546 retval = atomic_reduce_block;
7547 }
7548 } // otherwise: use critical section
7549
7550 #elif KMP_OS_DARWIN
7551
Jim Cownie5e8470a2013-09-27 10:38:44 +00007552 if( atomic_available && ( num_vars <= 3 ) ) {
7553 retval = atomic_reduce_block;
7554 } else if( tree_available ) {
7555 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7556 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7557 }
7558 } // otherwise: use critical section
7559
7560 #else
7561 #error "Unknown or unsupported OS"
7562 #endif
7563
7564 #else
7565 #error "Unknown or unsupported architecture"
7566 #endif
7567
7568 }
7569
Jim Cownie5e8470a2013-09-27 10:38:44 +00007570 // KMP_FORCE_REDUCTION
7571
Andrey Churbanovec23a952015-08-17 10:12:12 +00007572 // If the team is serialized (team_size == 1), ignore the forced reduction
7573 // method and stay with the unsynchronized method (empty_reduce_block)
7574 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007575
7576 PACKED_REDUCTION_METHOD_T forced_retval;
7577
7578 int atomic_available, tree_available;
7579
7580 switch( ( forced_retval = __kmp_force_reduction_method ) )
7581 {
7582 case critical_reduce_block:
7583 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007584 break;
7585
7586 case atomic_reduce_block:
7587 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7588 KMP_ASSERT( atomic_available ); // atomic_available should be != 0
7589 break;
7590
7591 case tree_reduce_block:
7592 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7593 KMP_ASSERT( tree_available ); // tree_available should be != 0
7594 #if KMP_FAST_REDUCTION_BARRIER
7595 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7596 #endif
7597 break;
7598
7599 default:
7600 KMP_ASSERT( 0 ); // "unsupported method specified"
7601 }
7602
7603 retval = forced_retval;
7604 }
7605
7606 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7607
7608 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7609 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7610
7611 return ( retval );
7612}
7613
7614// this function is for testing set/get/determine reduce method
7615kmp_int32
7616__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007617 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007618}
7619
7620/* ------------------------------------------------------------------------ */