blob: 5d5f5de13ded6848c90a9e4a1bd17da0d4997009 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000028#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
44char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
45#endif /* defined(KMP_GOMP_COMPAT) */
46
47char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000048#if OMP_50_ENABLED
49 "5.0 (201611)";
50#elif OMP_45_ENABLED
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000051 "4.5 (201511)";
52#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000053 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000054#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#endif
57
58#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000059char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000060#endif /* KMP_DEBUG */
61
Jim Cownie181b4bb2013-12-23 17:28:57 +000062#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
63
Jim Cownie5e8470a2013-09-27 10:38:44 +000064/* ------------------------------------------------------------------------ */
65/* ------------------------------------------------------------------------ */
66
67kmp_info_t __kmp_monitor;
68
69/* ------------------------------------------------------------------------ */
70/* ------------------------------------------------------------------------ */
71
72/* Forward declarations */
73
74void __kmp_cleanup( void );
75
76static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000077static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000078#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000079static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000082void __kmp_fork_barrier( int gtid, int tid );
83void __kmp_join_barrier( int gtid );
84void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000085
Jim Cownie5e8470a2013-09-27 10:38:44 +000086#ifdef USE_LOAD_BALANCE
87static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
88#endif
89
90static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000091#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000092static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000093#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000094static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
95static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
96static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
97
98/* ------------------------------------------------------------------------ */
99/* ------------------------------------------------------------------------ */
100
101/* Calculate the identifier of the current thread */
102/* fast (and somewhat portable) way to get unique */
103/* identifier of executing thread. */
104/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
105
106int
107__kmp_get_global_thread_id( )
108{
109 int i;
110 kmp_info_t **other_threads;
111 size_t stack_data;
112 char *stack_addr;
113 size_t stack_size;
114 char *stack_base;
115
116 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
117 __kmp_nth, __kmp_all_nth ));
118
119 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
120 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
121 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
122 __kmp_init_gtid for this to work. */
123
124 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
125
126#ifdef KMP_TDATA_GTID
127 if ( TCR_4(__kmp_gtid_mode) >= 3) {
128 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
129 return __kmp_gtid;
130 }
131#endif
132 if ( TCR_4(__kmp_gtid_mode) >= 2) {
133 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
134 return __kmp_gtid_get_specific();
135 }
136 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
137
138 stack_addr = (char*) & stack_data;
139 other_threads = __kmp_threads;
140
141 /*
142 ATT: The code below is a source of potential bugs due to unsynchronized access to
143 __kmp_threads array. For example:
144 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
145 2. Current thread is suspended by OS.
146 3. Another thread unregisters and finishes (debug versions of free() may fill memory
147 with something like 0xEF).
148 4. Current thread is resumed.
149 5. Current thread reads junk from *thr.
150 TODO: Fix it.
151 --ln
152 */
153
154 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
155
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 if( !thr ) continue;
158
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000159 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
160 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161
162 /* stack grows down -- search through all of the active threads */
163
164 if( stack_addr <= stack_base ) {
165 size_t stack_diff = stack_base - stack_addr;
166
167 if( stack_diff <= stack_size ) {
168 /* The only way we can be closer than the allocated */
169 /* stack size is if we are running on this thread. */
170 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
171 return i;
172 }
173 }
174 }
175
176 /* get specific to try and determine our gtid */
177 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
178 "thread, using TLS\n" ));
179 i = __kmp_gtid_get_specific();
180
181 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
182
183 /* if we havn't been assigned a gtid, then return code */
184 if( i<0 ) return i;
185
186 /* dynamically updated stack window for uber threads to avoid get_specific call */
187 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
188 KMP_FATAL( StackOverflow, i );
189 }
190
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000192 if( stack_addr > stack_base ) {
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
195 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
196 } else {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
198 }
199
200 /* Reprint stack bounds for ubermaster since they have been refined */
201 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000202 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
203 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000205 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206 "th_%d stack (refinement)", i );
207 }
208 return i;
209}
210
211int
212__kmp_get_global_thread_id_reg( )
213{
214 int gtid;
215
216 if ( !__kmp_init_serial ) {
217 gtid = KMP_GTID_DNE;
218 } else
219#ifdef KMP_TDATA_GTID
220 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
221 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
222 gtid = __kmp_gtid;
223 } else
224#endif
225 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
226 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
227 gtid = __kmp_gtid_get_specific();
228 } else {
229 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
230 gtid = __kmp_get_global_thread_id();
231 }
232
233 /* we must be a new uber master sibling thread */
234 if( gtid == KMP_GTID_DNE ) {
235 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
236 "Registering a new gtid.\n" ));
237 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
238 if( !__kmp_init_serial ) {
239 __kmp_do_serial_initialize();
240 gtid = __kmp_gtid_get_specific();
241 } else {
242 gtid = __kmp_register_root(FALSE);
243 }
244 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
245 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
246 }
247
248 KMP_DEBUG_ASSERT( gtid >=0 );
249
250 return gtid;
251}
252
253/* caller must hold forkjoin_lock */
254void
255__kmp_check_stack_overlap( kmp_info_t *th )
256{
257 int f;
258 char *stack_beg = NULL;
259 char *stack_end = NULL;
260 int gtid;
261
262 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
263 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
265 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266
267 gtid = __kmp_gtid_from_thread( th );
268
269 if (gtid == KMP_GTID_MONITOR) {
270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)", "mon",
272 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
273 } else {
274 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
275 "th_%d stack (%s)", gtid,
276 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
277 }
278 }
279
280 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000281 gtid = __kmp_gtid_from_thread( th );
282 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000283 {
284 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
285 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000286 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
287 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 }
289
290 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
291 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
292
293 if( f_th && f_th != th ) {
294 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
295 char *other_stack_beg = other_stack_end -
296 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
297 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
298 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
299
300 /* Print the other stack values before the abort */
301 if ( __kmp_storage_map )
302 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
303 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
304 "th_%d stack (overlapped)",
305 __kmp_gtid_from_thread( f_th ) );
306
307 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
308 }
309 }
310 }
311 }
312 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
313}
314
315
316/* ------------------------------------------------------------------------ */
317
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* ------------------------------------------------------------------------ */
319
320void
321__kmp_infinite_loop( void )
322{
323 static int done = FALSE;
324
325 while (! done) {
326 KMP_YIELD( 1 );
327 }
328}
329
330#define MAX_MESSAGE 512
331
332void
333__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
334 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 va_list ap;
336
337 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000338 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
340 __kmp_vprintf( kmp_err, buffer, ap );
341#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000342 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343 if(gtid >= 0) {
344 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
345 if( __kmp_storage_map_verbose ) {
346 node = __kmp_get_host_node(p1);
347 if(node < 0) /* doesn't work, so don't try this next time */
348 __kmp_storage_map_verbose = FALSE;
349 else {
350 char *last;
351 int lastNode;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
353
Jonathan Peyton762bc462016-10-26 21:42:48 +0000354 const int page_size = KMP_GET_PAGE_SIZE();
355
356 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) );
357 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358 if(localProc >= 0)
359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
360 else
361 __kmp_printf_no_lock(" GTID %d\n", gtid);
362# if KMP_USE_PRCTL
363/* The more elaborate format is disabled for now because of the prctl hanging bug. */
364 do {
365 last = p1;
366 lastNode = node;
367 /* This loop collates adjacent pages with the same host node. */
368 do {
Jonathan Peyton762bc462016-10-26 21:42:48 +0000369 (char*)p1 += page_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
371 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
372 (char*)p1 - 1, lastNode);
373 } while(p1 <= p2);
374# else
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000376 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377 if(p1 < p2) {
378 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
Jonathan Peyton762bc462016-10-26 21:42:48 +0000379 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000380 }
381# endif
382 }
383 }
384 } else
385 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
386 }
387#endif /* KMP_PRINT_DATA_PLACEMENT */
388 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
389}
390
391void
392__kmp_warn( char const * format, ... )
393{
394 char buffer[MAX_MESSAGE];
395 va_list ap;
396
397 if ( __kmp_generate_warnings == kmp_warnings_off ) {
398 return;
399 }
400
401 va_start( ap, format );
402
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000403 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
405 __kmp_vprintf( kmp_err, buffer, ap );
406 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
407
408 va_end( ap );
409}
410
411void
412__kmp_abort_process()
413{
414
415 // Later threads may stall here, but that's ok because abort() will kill them.
416 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
417
418 if ( __kmp_debug_buf ) {
419 __kmp_dump_debug_buffer();
420 }; // if
421
422 if ( KMP_OS_WINDOWS ) {
423 // Let other threads know of abnormal termination and prevent deadlock
424 // if abort happened during library initialization or shutdown
425 __kmp_global.g.g_abort = SIGABRT;
426
427 /*
428 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
429 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
430 works well, but this function is not available in VS7 (this is not problem for DLL, but
431 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
432 not help, at least in some versions of MS C RTL.
433
434 It seems following sequence is the only way to simulate abort() and avoid pop-up error
435 box.
436 */
437 raise( SIGABRT );
438 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
441 }; // if
442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
445
446} // __kmp_abort_process
447
448void
449__kmp_abort_thread( void )
450{
451 // TODO: Eliminate g_abort global variable and this function.
452 // In case of abort just call abort(), it will kill all the threads.
453 __kmp_infinite_loop();
454} // __kmp_abort_thread
455
456/* ------------------------------------------------------------------------ */
457
458/*
459 * Print out the storage map for the major kmp_info_t thread data structures
460 * that are allocated together.
461 */
462
463static void
464__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
465{
466 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
467
468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
469 "th_%d.th_info", gtid );
470
471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
472 "th_%d.th_local", gtid );
473
474 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
475 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
476
477 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
478 &thr->th.th_bar[bs_plain_barrier+1],
479 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
480
481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
482 &thr->th.th_bar[bs_forkjoin_barrier+1],
483 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
484
485 #if KMP_FAST_REDUCTION_BARRIER
486 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier+1],
488 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
489 #endif // KMP_FAST_REDUCTION_BARRIER
490}
491
492/*
493 * Print out the storage map for the major kmp_team_t team data structures
494 * that are allocated together.
495 */
496
497static void
498__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
499{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000500 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000501 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
502 header, team_id );
503
504 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
505 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
506
507
508 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
509 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
510
511 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
512 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
513
514 #if KMP_FAST_REDUCTION_BARRIER
515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
516 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
517 #endif // KMP_FAST_REDUCTION_BARRIER
518
519 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
521
522 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
523 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
524
525 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
526 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
527 header, team_id );
528
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529
530 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
531 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
532}
533
534static void __kmp_init_allocator() {}
535static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536
537/* ------------------------------------------------------------------------ */
538
Jonathan Peyton99016992015-05-26 17:32:53 +0000539#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540# if KMP_OS_WINDOWS
541
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542static void
543__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
544 // TODO: Change to __kmp_break_bootstrap_lock().
545 __kmp_init_bootstrap_lock( lck ); // make the lock released
546}
547
548static void
549__kmp_reset_locks_on_process_detach( int gtid_req ) {
550 int i;
551 int thread_count;
552
553 // PROCESS_DETACH is expected to be called by a thread
554 // that executes ProcessExit() or FreeLibrary().
555 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
556 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
557 // However, in fact, some threads can be still alive here, although being about to be terminated.
558 // The threads in the array with ds_thread==0 are most suspicious.
559 // Actually, it can be not safe to access the __kmp_threads[].
560
561 // TODO: does it make sense to check __kmp_roots[] ?
562
563 // Let's check that there are no other alive threads registered with the OMP lib.
564 while( 1 ) {
565 thread_count = 0;
566 for( i = 0; i < __kmp_threads_capacity; ++i ) {
567 if( !__kmp_threads ) continue;
568 kmp_info_t* th = __kmp_threads[ i ];
569 if( th == NULL ) continue;
570 int gtid = th->th.th_info.ds.ds_gtid;
571 if( gtid == gtid_req ) continue;
572 if( gtid < 0 ) continue;
573 DWORD exit_val;
574 int alive = __kmp_is_thread_alive( th, &exit_val );
575 if( alive ) {
576 ++thread_count;
577 }
578 }
579 if( thread_count == 0 ) break; // success
580 }
581
582 // Assume that I'm alone.
583
584 // Now it might be probably safe to check and reset locks.
585 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
586 __kmp_reset_lock( &__kmp_forkjoin_lock );
587 #ifdef KMP_DEBUG
588 __kmp_reset_lock( &__kmp_stdio_lock );
589 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000590}
591
592BOOL WINAPI
593DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
594 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
595
596 switch( fdwReason ) {
597
598 case DLL_PROCESS_ATTACH:
599 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
600
601 return TRUE;
602
603 case DLL_PROCESS_DETACH:
604 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
605 __kmp_gtid_get_specific() ));
606
607 if( lpReserved != NULL )
608 {
609 // lpReserved is used for telling the difference:
610 // lpReserved == NULL when FreeLibrary() was called,
611 // lpReserved != NULL when the process terminates.
612 // When FreeLibrary() is called, worker threads remain alive.
613 // So they will release the forkjoin lock by themselves.
614 // When the process terminates, worker threads disappear triggering
615 // the problem of unreleased forkjoin lock as described below.
616
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000617 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 // The problem comes up if that worker thread becomes dead
619 // before it releases the forkjoin lock.
620 // The forkjoin lock remains taken, while the thread
621 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
622 // will try to take the forkjoin lock and will always fail,
623 // so that the application will never finish [normally].
624 // This scenario is possible if __kmpc_end() has not been executed.
625 // It looks like it's not a corner case, but common cases:
626 // - the main function was compiled by an alternative compiler;
627 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
628 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
629 // - alive foreign thread prevented __kmpc_end from doing cleanup.
630
631 // This is a hack to work around the problem.
632 // TODO: !!! to figure out something better.
633 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
634 }
635
636 __kmp_internal_end_library( __kmp_gtid_get_specific() );
637
638 return TRUE;
639
640 case DLL_THREAD_ATTACH:
641 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
642
643 /* if we wanted to register new siblings all the time here call
644 * __kmp_get_gtid(); */
645 return TRUE;
646
647 case DLL_THREAD_DETACH:
648 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
649 __kmp_gtid_get_specific() ));
650
651 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
652 return TRUE;
653 }
654
655 return TRUE;
656}
657
658# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000659#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660
661
662/* ------------------------------------------------------------------------ */
663
664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
666int
667__kmp_change_library( int status )
668{
669 int old_status;
670
671 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
672
673 if (status) {
674 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
675 }
676 else {
677 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
678 }
679
680 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
681}
682
683/* ------------------------------------------------------------------------ */
684/* ------------------------------------------------------------------------ */
685
686/* __kmp_parallel_deo --
687 * Wait until it's our turn.
688 */
689void
690__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
691{
692 int gtid = *gtid_ref;
693#ifdef BUILD_PARALLEL_ORDERED
694 kmp_team_t *team = __kmp_team_from_gtid( gtid );
695#endif /* BUILD_PARALLEL_ORDERED */
696
697 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000698 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000699#if KMP_USE_DYNAMIC_LOCK
700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
701#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704 }
705#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000708 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000709 KMP_MB();
710 }
711#endif /* BUILD_PARALLEL_ORDERED */
712}
713
714/* __kmp_parallel_dxo --
715 * Signal the next task.
716 */
717
718void
719__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
720{
721 int gtid = *gtid_ref;
722#ifdef BUILD_PARALLEL_ORDERED
723 int tid = __kmp_tid_from_gtid( gtid );
724 kmp_team_t *team = __kmp_team_from_gtid( gtid );
725#endif /* BUILD_PARALLEL_ORDERED */
726
727 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000728 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
730 }
731#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000732 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733 KMP_MB(); /* Flush all pending memory write invalidates. */
734
735 /* use the tid of the next thread in this team */
736 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000739#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000740 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000741 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
742 /* accept blame for "ordered" waiting */
743 kmp_info_t *this_thread = __kmp_threads[gtid];
744 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
745 this_thread->th.ompt_thread_info.wait_id);
746 }
747#endif
748
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749 KMP_MB(); /* Flush all pending memory write invalidates. */
750 }
751#endif /* BUILD_PARALLEL_ORDERED */
752}
753
754/* ------------------------------------------------------------------------ */
755/* ------------------------------------------------------------------------ */
756
757/* ------------------------------------------------------------------------ */
758/* ------------------------------------------------------------------------ */
759
760/* The BARRIER for a SINGLE process section is always explicit */
761
762int
763__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
764{
765 int status;
766 kmp_info_t *th;
767 kmp_team_t *team;
768
769 if( ! TCR_4(__kmp_init_parallel) )
770 __kmp_parallel_initialize();
771
772 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000773 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000774 status = 0;
775
776 th->th.th_ident = id_ref;
777
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000778 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000779 status = 1;
780 } else {
781 kmp_int32 old_this = th->th.th_local.this_construct;
782
783 ++th->th.th_local.this_construct;
784 /* try to set team count to thread count--success means thread got the
785 single block
786 */
787 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000788 if (team->t.t_construct == old_this) {
789 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
790 th->th.th_local.this_construct);
791 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000792#if USE_ITT_BUILD
793 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
794#if OMP_40_ENABLED
795 th->th.th_teams_microtask == NULL &&
796#endif
797 team->t.t_active_level == 1 )
798 { // Only report metadata by master of active team at level 1
799 __kmp_itt_metadata_single( id_ref );
800 }
801#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 }
803
804 if( __kmp_env_consistency_check ) {
805 if (status && push_ws) {
806 __kmp_push_workshare( gtid, ct_psingle, id_ref );
807 } else {
808 __kmp_check_workshare( gtid, ct_psingle, id_ref );
809 }
810 }
811#if USE_ITT_BUILD
812 if ( status ) {
813 __kmp_itt_single_start( gtid );
814 }
815#endif /* USE_ITT_BUILD */
816 return status;
817}
818
819void
820__kmp_exit_single( int gtid )
821{
822#if USE_ITT_BUILD
823 __kmp_itt_single_end( gtid );
824#endif /* USE_ITT_BUILD */
825 if( __kmp_env_consistency_check )
826 __kmp_pop_workshare( gtid, ct_psingle, NULL );
827}
828
829
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830/*
831 * determine if we can go parallel or must use a serialized parallel region and
832 * how many threads we can use
833 * set_nproc is the number of threads requested for the team
834 * returns 0 if we should serialize or only use one thread,
835 * otherwise the number of threads to use
836 * The forkjoin lock is held by the caller.
837 */
838static int
839__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
840 int master_tid, int set_nthreads
841#if OMP_40_ENABLED
842 , int enter_teams
843#endif /* OMP_40_ENABLED */
844)
845{
846 int capacity;
847 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848 KMP_DEBUG_ASSERT( __kmp_init_serial );
849 KMP_DEBUG_ASSERT( root && parent_team );
850
851 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 // If dyn-var is set, dynamically adjust the number of desired threads,
853 // according to the method specified by dynamic_mode.
854 //
855 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
857 ;
858 }
859#ifdef USE_LOAD_BALANCE
860 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
861 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
862 if ( new_nthreads == 1 ) {
863 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
864 master_tid ));
865 return 1;
866 }
867 if ( new_nthreads < set_nthreads ) {
868 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
869 master_tid, new_nthreads ));
870 }
871 }
872#endif /* USE_LOAD_BALANCE */
873 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
874 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
875 : root->r.r_hot_team->t.t_nproc);
876 if ( new_nthreads <= 1 ) {
877 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
878 master_tid ));
879 return 1;
880 }
881 if ( new_nthreads < set_nthreads ) {
882 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
883 master_tid, new_nthreads ));
884 }
885 else {
886 new_nthreads = set_nthreads;
887 }
888 }
889 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
890 if ( set_nthreads > 2 ) {
891 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
892 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
893 if ( new_nthreads == 1 ) {
894 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
895 master_tid ));
896 return 1;
897 }
898 if ( new_nthreads < set_nthreads ) {
899 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
900 master_tid, new_nthreads ));
901 }
902 }
903 }
904 else {
905 KMP_ASSERT( 0 );
906 }
907
908 //
909 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
910 //
911 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
913 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
914 root->r.r_hot_team->t.t_nproc );
915 if ( tl_nthreads <= 0 ) {
916 tl_nthreads = 1;
917 }
918
919 //
920 // If dyn-var is false, emit a 1-time warning.
921 //
922 if ( ! get__dynamic_2( parent_team, master_tid )
923 && ( ! __kmp_reserve_warn ) ) {
924 __kmp_reserve_warn = 1;
925 __kmp_msg(
926 kmp_ms_warning,
927 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
928 KMP_HNT( Unset_ALL_THREADS ),
929 __kmp_msg_null
930 );
931 }
932 if ( tl_nthreads == 1 ) {
933 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
934 master_tid ));
935 return 1;
936 }
937 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
938 master_tid, tl_nthreads ));
939 new_nthreads = tl_nthreads;
940 }
941
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942 //
943 // Check if the threads array is large enough, or needs expanding.
944 //
945 // See comment in __kmp_register_root() about the adjustment if
946 // __kmp_threads[0] == NULL.
947 //
948 capacity = __kmp_threads_capacity;
949 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
950 --capacity;
951 }
952 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
953 root->r.r_hot_team->t.t_nproc ) > capacity ) {
954 //
955 // Expand the threads array.
956 //
957 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
958 root->r.r_hot_team->t.t_nproc ) - capacity;
959 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
960 if ( slotsAdded < slotsRequired ) {
961 //
962 // The threads array was not expanded enough.
963 //
964 new_nthreads -= ( slotsRequired - slotsAdded );
965 KMP_ASSERT( new_nthreads >= 1 );
966
967 //
968 // If dyn-var is false, emit a 1-time warning.
969 //
970 if ( ! get__dynamic_2( parent_team, master_tid )
971 && ( ! __kmp_reserve_warn ) ) {
972 __kmp_reserve_warn = 1;
973 if ( __kmp_tp_cached ) {
974 __kmp_msg(
975 kmp_ms_warning,
976 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
977 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
978 KMP_HNT( PossibleSystemLimitOnThreads ),
979 __kmp_msg_null
980 );
981 }
982 else {
983 __kmp_msg(
984 kmp_ms_warning,
985 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
986 KMP_HNT( SystemLimitOnThreads ),
987 __kmp_msg_null
988 );
989 }
990 }
991 }
992 }
993
994 if ( new_nthreads == 1 ) {
995 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
996 __kmp_get_gtid(), set_nthreads ) );
997 return 1;
998 }
999
1000 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1001 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1002 return new_nthreads;
1003}
1004
1005/* ------------------------------------------------------------------------ */
1006/* ------------------------------------------------------------------------ */
1007
1008/* allocate threads from the thread pool and assign them to the new team */
1009/* we are assured that there are enough threads available, because we
1010 * checked on that earlier within critical section forkjoin */
1011
1012static void
1013__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1014 kmp_info_t *master_th, int master_gtid )
1015{
1016 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001017 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018
1019 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1020 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1021 KMP_MB();
1022
1023 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001024 master_th->th.th_info.ds.ds_tid = 0;
1025 master_th->th.th_team = team;
1026 master_th->th.th_team_nproc = team->t.t_nproc;
1027 master_th->th.th_team_master = master_th;
1028 master_th->th.th_team_serialized = FALSE;
1029 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030
1031 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032#if KMP_NESTED_HOT_TEAMS
1033 use_hot_team = 0;
1034 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1035 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1036 int level = team->t.t_active_level - 1; // index in array of hot teams
1037 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1038 if( master_th->th.th_teams_size.nteams > 1 ) {
1039 ++level; // level was not increased in teams construct for team_of_masters
1040 }
1041 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1042 master_th->th.th_teams_level == team->t.t_level ) {
1043 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1044 } // team->t.t_level will be increased inside parallel
1045 }
1046 if( level < __kmp_hot_teams_max_level ) {
1047 if( hot_teams[level].hot_team ) {
1048 // hot team has already been allocated for given level
1049 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1050 use_hot_team = 1; // the team is ready to use
1051 } else {
1052 use_hot_team = 0; // AC: threads are not allocated yet
1053 hot_teams[level].hot_team = team; // remember new hot team
1054 hot_teams[level].hot_team_nth = team->t.t_nproc;
1055 }
1056 } else {
1057 use_hot_team = 0;
1058 }
1059 }
1060#else
1061 use_hot_team = team == root->r.r_hot_team;
1062#endif
1063 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001064
1065 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001066 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 __kmp_initialize_info( master_th, team, 0, master_gtid );
1068
1069 /* now, install the worker threads */
1070 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1071
1072 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1074 team->t.t_threads[ i ] = thr;
1075 KMP_DEBUG_ASSERT( thr );
1076 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001078 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1080 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1081 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1082 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083#if OMP_40_ENABLED
1084 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1085 thr->th.th_teams_level = master_th->th.th_teams_level;
1086 thr->th.th_teams_size = master_th->th.th_teams_size;
1087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088 { // Initialize threads' barrier data.
1089 int b;
1090 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1091 for ( b = 0; b < bs_last_barrier; ++ b ) {
1092 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001093 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001094#if USE_DEBUGGER
1095 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097 }; // for b
1098 }
1099 }
1100
Alp Toker98758b02014-03-02 04:12:06 +00001101#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102 __kmp_partition_places( team );
1103#endif
1104
1105 }
1106
1107 KMP_MB();
1108}
1109
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001110#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1111//
1112// Propagate any changes to the floating point control registers out to the team
1113// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1114// so we don't make changes unless they are needed.
1115//
1116inline static void
1117propagateFPControl(kmp_team_t * team)
1118{
1119 if ( __kmp_inherit_fp_control ) {
1120 kmp_int16 x87_fpu_control_word;
1121 kmp_uint32 mxcsr;
1122
1123 // Get master values of FPU control flags (both X87 and vector)
1124 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1125 __kmp_store_mxcsr( &mxcsr );
1126 mxcsr &= KMP_X86_MXCSR_MASK;
1127
1128 // There is no point looking at t_fp_control_saved here.
1129 // If it is TRUE, we still have to update the values if they are different from those we now have.
1130 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1131 // that the values in the team are the same as those we have.
1132 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1133 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1134 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001135 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1136 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001137 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1138 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001139 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
1141 else {
1142 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144 }
1145}
1146
1147// Do the opposite, setting the hardware registers to the updated values from the team.
1148inline static void
1149updateHWFPControl(kmp_team_t * team)
1150{
1151 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1152 //
1153 // Only reset the fp control regs if they have been changed in the team.
1154 // the parallel region that we are exiting.
1155 //
1156 kmp_int16 x87_fpu_control_word;
1157 kmp_uint32 mxcsr;
1158 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1159 __kmp_store_mxcsr( &mxcsr );
1160 mxcsr &= KMP_X86_MXCSR_MASK;
1161
1162 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1163 __kmp_clear_x87_fpu_status_word();
1164 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1165 }
1166
1167 if ( team->t.t_mxcsr != mxcsr ) {
1168 __kmp_load_mxcsr( &team->t.t_mxcsr );
1169 }
1170 }
1171}
1172#else
1173# define propagateFPControl(x) ((void)0)
1174# define updateHWFPControl(x) ((void)0)
1175#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1176
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177static void
1178__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001180/*
1181 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1182 */
1183void
1184__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1185{
1186 kmp_info_t *this_thr;
1187 kmp_team_t *serial_team;
1188
1189 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1190
1191 /* Skip all this code for autopar serialized loops since it results in
1192 unacceptable overhead */
1193 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1194 return;
1195
1196 if( ! TCR_4( __kmp_init_parallel ) )
1197 __kmp_parallel_initialize();
1198
1199 this_thr = __kmp_threads[ global_tid ];
1200 serial_team = this_thr->th.th_serial_team;
1201
1202 /* utilize the serialized team held by this thread */
1203 KMP_DEBUG_ASSERT( serial_team );
1204 KMP_MB();
1205
1206 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001207 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1208 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001209 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1210 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1211 this_thr->th.th_task_team = NULL;
1212 }
1213
1214#if OMP_40_ENABLED
1215 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1216 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1217 proc_bind = proc_bind_false;
1218 }
1219 else if ( proc_bind == proc_bind_default ) {
1220 //
1221 // No proc_bind clause was specified, so use the current value
1222 // of proc-bind-var for this parallel region.
1223 //
1224 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1225 }
1226 //
1227 // Reset for next parallel region
1228 //
1229 this_thr->th.th_set_proc_bind = proc_bind_default;
1230#endif /* OMP_40_ENABLED */
1231
1232 if( this_thr->th.th_team != serial_team ) {
1233 // Nested level will be an index in the nested nthreads array
1234 int level = this_thr->th.th_team->t.t_level;
1235
1236 if( serial_team->t.t_serialized ) {
1237 /* this serial team was already used
1238 * TODO increase performance by making this locks more specific */
1239 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240
1241 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1242
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001243#if OMPT_SUPPORT
1244 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1245#endif
1246
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001248#if OMPT_SUPPORT
1249 ompt_parallel_id,
1250#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001251#if OMP_40_ENABLED
1252 proc_bind,
1253#endif
1254 & this_thr->th.th_current_task->td_icvs,
1255 0 USE_NESTED_HOT_ARG(NULL) );
1256 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1257 KMP_ASSERT( new_team );
1258
1259 /* setup new serialized team and install it */
1260 new_team->t.t_threads[0] = this_thr;
1261 new_team->t.t_parent = this_thr->th.th_team;
1262 serial_team = new_team;
1263 this_thr->th.th_serial_team = serial_team;
1264
1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1266 global_tid, serial_team ) );
1267
1268
1269 /* TODO the above breaks the requirement that if we run out of
1270 * resources, then we can still guarantee that serialized teams
1271 * are ok, since we may need to allocate a new one */
1272 } else {
1273 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1274 global_tid, serial_team ) );
1275 }
1276
1277 /* we have to initialize this serial team */
1278 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1279 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1280 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1281 serial_team->t.t_ident = loc;
1282 serial_team->t.t_serialized = 1;
1283 serial_team->t.t_nproc = 1;
1284 serial_team->t.t_parent = this_thr->th.th_team;
1285 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1286 this_thr->th.th_team = serial_team;
1287 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1288
1289 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1290 global_tid, this_thr->th.th_current_task ) );
1291 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1292 this_thr->th.th_current_task->td_flags.executing = 0;
1293
1294 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1295
1296 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1297 each serialized task represented by team->t.t_serialized? */
1298 copy_icvs(
1299 & this_thr->th.th_current_task->td_icvs,
1300 & this_thr->th.th_current_task->td_parent->td_icvs );
1301
1302 // Thread value exists in the nested nthreads array for the next nested level
1303 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1304 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1305 }
1306
1307#if OMP_40_ENABLED
1308 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1309 this_thr->th.th_current_task->td_icvs.proc_bind
1310 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1311 }
1312#endif /* OMP_40_ENABLED */
1313
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001314#if USE_DEBUGGER
1315 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1316#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317 this_thr->th.th_info.ds.ds_tid = 0;
1318
1319 /* set thread cache values */
1320 this_thr->th.th_team_nproc = 1;
1321 this_thr->th.th_team_master = this_thr;
1322 this_thr->th.th_team_serialized = 1;
1323
1324 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1325 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1326
1327 propagateFPControl (serial_team);
1328
1329 /* check if we need to allocate dispatch buffers stack */
1330 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1331 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1332 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1333 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1334 }
1335 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1336
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001337#if OMPT_SUPPORT
1338 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1339 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1340#endif
1341
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 KMP_MB();
1343
1344 } else {
1345 /* this serialized team is already being used,
1346 * that's fine, just add another nested level */
1347 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1348 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1349 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1350 ++ serial_team->t.t_serialized;
1351 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1352
1353 // Nested level will be an index in the nested nthreads array
1354 int level = this_thr->th.th_team->t.t_level;
1355 // Thread value exists in the nested nthreads array for the next nested level
1356 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1357 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1358 }
1359 serial_team->t.t_level++;
1360 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1361 global_tid, serial_team, serial_team->t.t_level ) );
1362
1363 /* allocate/push dispatch buffers stack */
1364 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1365 {
1366 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1367 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1368 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1369 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1370 }
1371 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1372
1373 KMP_MB();
1374 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001375#if OMP_40_ENABLED
1376 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1377#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001378
1379 if ( __kmp_env_consistency_check )
1380 __kmp_push_parallel( global_tid, NULL );
1381
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001382}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001383
Jim Cownie5e8470a2013-09-27 10:38:44 +00001384/* most of the work for a fork */
1385/* return true if we really went parallel, false if serialized */
1386int
1387__kmp_fork_call(
1388 ident_t * loc,
1389 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001390 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001392#if OMPT_SUPPORT
1393 void *unwrapped_task,
1394#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001395 microtask_t microtask,
1396 launch_t invoker,
1397/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001398#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001399 va_list * ap
1400#else
1401 va_list ap
1402#endif
1403 )
1404{
1405 void **argv;
1406 int i;
1407 int master_tid;
1408 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001409 kmp_team_t *team;
1410 kmp_team_t *parent_team;
1411 kmp_info_t *master_th;
1412 kmp_root_t *root;
1413 int nthreads;
1414 int master_active;
1415 int master_set_numthreads;
1416 int level;
1417#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001419 int teams_level;
1420#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001421#if KMP_NESTED_HOT_TEAMS
1422 kmp_hot_team_ptr_t **p_hot_teams;
1423#endif
1424 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001425 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001426 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427
1428 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001429 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1430 /* Some systems prefer the stack for the root thread(s) to start with */
1431 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001432 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001433 /* These 2 lines below are so this does not get optimized out */
1434 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1435 __kmp_stkpadding += (short)((kmp_int64)dummy);
1436 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437
1438 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001439 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440 if( ! TCR_4(__kmp_init_parallel) )
1441 __kmp_parallel_initialize();
1442
1443 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001444 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1445 parent_team = master_th->th.th_team;
1446 master_tid = master_th->th.th_info.ds.ds_tid;
1447 master_this_cons = master_th->th.th_local.this_construct;
1448 root = master_th->th.th_root;
1449 master_active = root->r.r_active;
1450 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001451
1452#if OMPT_SUPPORT
1453 ompt_parallel_id_t ompt_parallel_id;
1454 ompt_task_id_t ompt_task_id;
1455 ompt_frame_t *ompt_frame;
1456 ompt_task_id_t my_task_id;
1457 ompt_parallel_id_t my_parallel_id;
1458
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001459 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001460 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1461 ompt_task_id = __ompt_get_task_id_internal(0);
1462 ompt_frame = __ompt_get_task_frame_internal(0);
1463 }
1464#endif
1465
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466 // Nested level will be an index in the nested nthreads array
1467 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001468 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001469#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1471#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001472#if KMP_NESTED_HOT_TEAMS
1473 p_hot_teams = &master_th->th.th_hot_teams;
1474 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1475 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1476 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1477 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1478 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1479 }
1480#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001482#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001483 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001484 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1485 int team_size = master_set_numthreads;
1486
1487 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1488 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001489 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001490 }
1491#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492
Jim Cownie5e8470a2013-09-27 10:38:44 +00001493 master_th->th.th_ident = loc;
1494
1495#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001496 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1498 // AC: This is start of parallel that is nested inside teams construct.
1499 // The team is actual (hot), all workers are ready at the fork barrier.
1500 // No lock needed to initialize the team a bit, then free workers.
1501 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001502 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503 parent_team->t.t_argc = argc;
1504 argv = (void**)parent_team->t.t_argv;
1505 for( i=argc-1; i >= 0; --i )
1506/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001507#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508 *argv++ = va_arg( *ap, void * );
1509#else
1510 *argv++ = va_arg( ap, void * );
1511#endif
1512 /* Increment our nested depth levels, but not increase the serialization */
1513 if ( parent_team == master_th->th.th_serial_team ) {
1514 // AC: we are in serialized parallel
1515 __kmpc_serialized_parallel(loc, gtid);
1516 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1517 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1518 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001519
1520#if OMPT_SUPPORT
1521 void *dummy;
1522 void **exit_runtime_p;
1523
1524 ompt_lw_taskteam_t lw_taskteam;
1525
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001526 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001527 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1528 unwrapped_task, ompt_parallel_id);
1529 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1530 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1531
1532 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1533
1534#if OMPT_TRACE
1535 /* OMPT implicit task begin */
1536 my_task_id = lw_taskteam.ompt_task_info.task_id;
1537 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001538 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001539 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1540 my_parallel_id, my_task_id);
1541 }
1542#endif
1543
1544 /* OMPT state */
1545 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1546 } else {
1547 exit_runtime_p = &dummy;
1548 }
1549#endif
1550
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001552 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1553 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001554 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001556 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001558 );
1559 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001560
1561#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001562 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001563 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001564#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001565 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001566
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001567 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001568 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1569 ompt_parallel_id, ompt_task_id);
1570 }
1571
1572 __ompt_lw_taskteam_unlink(master_th);
1573 // reset clear the task id only after unlinking the task
1574 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1575#endif
1576
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001577 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001578 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001579 ompt_parallel_id, ompt_task_id,
1580 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001581 }
1582 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1583 }
1584#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 return TRUE;
1586 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001587
Jim Cownie5e8470a2013-09-27 10:38:44 +00001588 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001589#if OMPT_SUPPORT
1590 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1591#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001592 parent_team->t.t_invoke = invoker;
1593 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1594 parent_team->t.t_active_level ++;
1595 parent_team->t.t_level ++;
1596
1597 /* Change number of threads in the team if requested */
1598 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001599 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600 // AC: only can reduce the number of threads dynamically, cannot increase
1601 kmp_info_t **other_threads = parent_team->t.t_threads;
1602 parent_team->t.t_nproc = master_set_numthreads;
1603 for ( i = 0; i < master_set_numthreads; ++i ) {
1604 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1605 }
1606 // Keep extra threads hot in the team for possible next parallels
1607 }
1608 master_th->th.th_set_nproc = 0;
1609 }
1610
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001611#if USE_DEBUGGER
1612 if ( __kmp_debugging ) { // Let debugger override number of threads.
1613 int nth = __kmp_omp_num_threads( loc );
1614 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1615 master_set_numthreads = nth;
1616 }; // if
1617 }; // if
1618#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001619
1620 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1621 __kmp_internal_fork( loc, gtid, parent_team );
1622 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1623
1624 /* Invoke microtask for MASTER thread */
1625 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1626 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1627
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001628 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001629 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1630 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001631 if (! parent_team->t.t_invoke( gtid )) {
1632 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1633 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634 }
1635 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1636 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1637 KMP_MB(); /* Flush all pending memory write invalidates. */
1638
1639 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1640
1641 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001642 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643#endif /* OMP_40_ENABLED */
1644
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001645#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001647 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001649#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1652 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001653 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001654#if OMP_40_ENABLED
1655 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1656#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001657 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001658 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001659
1660 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1661 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1662 if (nthreads > 1) {
1663 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1664#if OMP_40_ENABLED
1665 && !enter_teams
1666#endif /* OMP_40_ENABLED */
1667 ) ) || ( __kmp_library == library_serial ) ) {
1668 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1669 gtid, nthreads ));
1670 nthreads = 1;
1671 }
1672 }
1673 if ( nthreads > 1 ) {
1674 /* determine how many new threads we can use */
1675 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1676
1677 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001679/* AC: If we execute teams from parallel region (on host), then teams should be created
1680 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1681 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001682 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001684 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001685 if ( nthreads == 1 ) {
1686 // Free lock for single thread execution here;
1687 // for multi-thread execution it will be freed later
1688 // after team of threads created and initialized
1689 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1690 }
1691 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692 }
1693 KMP_DEBUG_ASSERT( nthreads > 0 );
1694
1695 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698 /* create a serialized parallel region? */
1699 if ( nthreads == 1 ) {
1700 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001701#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 void * args[ argc ];
1703#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001704 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001705#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1708
1709 __kmpc_serialized_parallel(loc, gtid);
1710
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001711 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001713 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001714#if OMP_40_ENABLED
1715 if ( !ap ) {
1716 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001717 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001719
1720#if OMPT_SUPPORT
1721 void *dummy;
1722 void **exit_runtime_p;
1723
1724 ompt_lw_taskteam_t lw_taskteam;
1725
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001726 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001727 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1728 unwrapped_task, ompt_parallel_id);
1729 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1730 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1731
1732 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1733
1734#if OMPT_TRACE
1735 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001736 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001737 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1738 ompt_parallel_id, my_task_id);
1739 }
1740#endif
1741
1742 /* OMPT state */
1743 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1744 } else {
1745 exit_runtime_p = &dummy;
1746 }
1747#endif
1748
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001749 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001750 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1751 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001752 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1753#if OMPT_SUPPORT
1754 , exit_runtime_p
1755#endif
1756 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001757 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758
1759#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001760 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001761 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001762 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763
1764#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001765 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001766 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1767 ompt_parallel_id, ompt_task_id);
1768 }
1769#endif
1770
1771 __ompt_lw_taskteam_unlink(master_th);
1772 // reset clear the task id only after unlinking the task
1773 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1774
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001775 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001776 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001777 ompt_parallel_id, ompt_task_id,
1778 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001779 }
1780 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1781 }
1782#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001783 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1784 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1785 team = master_th->th.th_team;
1786 //team->t.t_pkfn = microtask;
1787 team->t.t_invoke = invoker;
1788 __kmp_alloc_argv_entries( argc, team, TRUE );
1789 team->t.t_argc = argc;
1790 argv = (void**) team->t.t_argv;
1791 if ( ap ) {
1792 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001793// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001794# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001795 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001796# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001799 } else {
1800 for( i=0; i < argc; ++i )
1801 // Get args from parent team for teams construct
1802 argv[i] = parent_team->t.t_argv[i];
1803 }
1804 // AC: revert change made in __kmpc_serialized_parallel()
1805 // because initial code in teams should have level=0
1806 team->t.t_level--;
1807 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001808 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001809 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1810 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001811 invoker(gtid);
1812 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 } else {
1814#endif /* OMP_40_ENABLED */
1815 argv = args;
1816 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001818#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001820#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001822#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001823 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001824
1825#if OMPT_SUPPORT
1826 void *dummy;
1827 void **exit_runtime_p;
1828
1829 ompt_lw_taskteam_t lw_taskteam;
1830
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001831 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001832 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1833 unwrapped_task, ompt_parallel_id);
1834 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1835 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1836
1837 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1838
1839#if OMPT_TRACE
1840 /* OMPT implicit task begin */
1841 my_task_id = lw_taskteam.ompt_task_info.task_id;
1842 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001843 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1845 my_parallel_id, my_task_id);
1846 }
1847#endif
1848
1849 /* OMPT state */
1850 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1851 } else {
1852 exit_runtime_p = &dummy;
1853 }
1854#endif
1855
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001856 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001857 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1858 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001859 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1860#if OMPT_SUPPORT
1861 , exit_runtime_p
1862#endif
1863 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001864 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001865
1866#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001867 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001868 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001869#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001870 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001871
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001872 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001873 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1874 my_parallel_id, my_task_id);
1875 }
1876#endif
1877
1878 __ompt_lw_taskteam_unlink(master_th);
1879 // reset clear the task id only after unlinking the task
1880 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1881
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001882 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001883 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001884 ompt_parallel_id, ompt_task_id,
1885 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886 }
1887 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1888 }
1889#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001890#if OMP_40_ENABLED
1891 }
1892#endif /* OMP_40_ENABLED */
1893 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001894 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001895#if OMPT_SUPPORT
1896 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1897 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1898 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1899 unwrapped_task, ompt_parallel_id);
1900
1901 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001902 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001903 __ompt_lw_taskteam_link(lwt, master_th);
1904#endif
1905
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 // we were called from GNU native code
1907 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1908 return FALSE;
1909 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001911 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001912 }
1913
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001915 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916 KMP_MB();
1917 return FALSE;
1918 }
1919
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 // GEH: only modify the executing flag in the case when not serialized
1921 // serialized case is handled in kmpc_serialized_parallel
1922 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001923 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1924 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1926 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1927 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928
1929#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001930 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931#endif /* OMP_40_ENABLED */
1932 {
1933 /* Increment our nested depth level */
1934 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1935 }
1936
Jim Cownie5e8470a2013-09-27 10:38:44 +00001937 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001939 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1940 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941 }
1942 else {
1943 nthreads_icv = 0; // don't update
1944 }
1945
1946#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1951 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 }
1953 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001954 if (proc_bind == proc_bind_default) {
1955 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1956 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001958 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1959 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001961 if ((level+1 < __kmp_nested_proc_bind.used)
1962 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1963 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 }
1965 }
1966
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001968 master_th->th.th_set_proc_bind = proc_bind_default;
1969#endif /* OMP_40_ENABLED */
1970
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001971 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001972#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001973 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001975 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001977 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001979 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 new_icvs.nproc = nthreads_icv;
1981 }
1982
1983#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001984 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985 new_icvs.proc_bind = proc_bind_icv;
1986 }
1987#endif /* OMP_40_ENABLED */
1988
1989 /* allocate a new parallel team */
1990 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1991 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001992#if OMPT_SUPPORT
1993 ompt_parallel_id,
1994#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001996 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1999 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000 /* allocate a new parallel team */
2001 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
2002 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002003#if OMPT_SUPPORT
2004 ompt_parallel_id,
2005#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002007 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002009 &master_th->th.th_current_task->td_icvs, argc
2010 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002012 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013
2014 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002015 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2016 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2017 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2018 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2019 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002020#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002021 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002022#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002023 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024 // TODO: parent_team->t.t_level == INT_MAX ???
2025#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002026 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002028 int new_level = parent_team->t.t_level + 1;
2029 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2030 new_level = parent_team->t.t_active_level + 1;
2031 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032#if OMP_40_ENABLED
2033 } else {
2034 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002035 int new_level = parent_team->t.t_level;
2036 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2037 new_level = parent_team->t.t_active_level;
2038 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039 }
2040#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002041 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002042 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002043 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002044
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002045#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002046 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002047#endif
2048
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002049 // Update the floating point rounding in the team if required.
2050 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002051
2052 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002053 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002054#if 0
2055 // Patch out an assertion that trips while the runtime seems to operate correctly.
2056 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002057 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002058#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002060 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002061 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002062
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002063 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002064 // Take a memo of master's task_state
2065 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2066 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002067 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2068 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002069 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002070 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002071 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2072 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2073 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002074 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2075 new_stack[i] = 0;
2076 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002077 old_stack = master_th->th.th_task_state_memo_stack;
2078 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002079 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002080 __kmp_free(old_stack);
2081 }
2082 // Store master's task_state on stack
2083 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2084 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002086 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002087 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2088 }
2089 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002090#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002091 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002092#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002093 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002094#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002095 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002096#if !KMP_NESTED_HOT_TEAMS
2097 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2098#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002100
2101 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2102 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2103 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2104 ( team->t.t_master_tid == 0 &&
2105 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2106 KMP_MB();
2107
2108 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110#if OMP_40_ENABLED
2111 if ( ap ) {
2112#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002113 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002114// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002115#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002116 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002118 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002119#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002120 KMP_CHECK_UPDATE(*argv, new_argv);
2121 argv++;
2122 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123#if OMP_40_ENABLED
2124 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002125 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002127 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2128 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129 }
2130#endif /* OMP_40_ENABLED */
2131
2132 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002133 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002134 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2135 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136
2137 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002138 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002139
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002140#if OMPT_SUPPORT
2141 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2142#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143
2144 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2145
Jim Cownie5e8470a2013-09-27 10:38:44 +00002146#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002147 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002148# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002149 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002151 ) {
2152#if USE_ITT_NOTIFY
2153 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2154 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002155 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002156 kmp_uint64 tmp_time = 0;
2157 if ( __itt_get_timestamp_ptr )
2158 tmp_time = __itt_get_timestamp();
2159 // Internal fork - report frame begin
2160 master_th->th.th_frame_time = tmp_time;
2161 if ( __kmp_forkjoin_frames_mode == 3 )
2162 team->t.t_region_time = tmp_time;
2163 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2164#endif /* USE_ITT_NOTIFY */
2165 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2166 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2167 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002168 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2169 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002171#endif /* USE_ITT_BUILD */
2172
2173 /* now go on and do the work */
2174 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2175 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002176 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2177 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178
2179#if USE_ITT_BUILD
2180 if ( __itt_stack_caller_create_ptr ) {
2181 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2182 }
2183#endif /* USE_ITT_BUILD */
2184
2185#if OMP_40_ENABLED
2186 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2187#endif /* OMP_40_ENABLED */
2188 {
2189 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002190 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2191 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192 }
2193
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002194 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2196 return TRUE;
2197 }
2198
2199 /* Invoke microtask for MASTER thread */
2200 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2201 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002202 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002203
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002205 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2206 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002207 if (! team->t.t_invoke( gtid )) {
2208 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2209 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002210 }
2211 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2212 gtid, team->t.t_id, team->t.t_pkfn ) );
2213 KMP_MB(); /* Flush all pending memory write invalidates. */
2214
2215 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2216
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002217#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002218 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002219 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2220 }
2221#endif
2222
Jim Cownie5e8470a2013-09-27 10:38:44 +00002223 return TRUE;
2224}
2225
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002226#if OMPT_SUPPORT
2227static inline void
2228__kmp_join_restore_state(
2229 kmp_info_t *thread,
2230 kmp_team_t *team)
2231{
2232 // restore state outside the region
2233 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2234 ompt_state_work_serial : ompt_state_work_parallel);
2235}
2236
2237static inline void
2238__kmp_join_ompt(
2239 kmp_info_t *thread,
2240 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002241 ompt_parallel_id_t parallel_id,
2242 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002244 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002245 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002246 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002247 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002248 }
2249
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002250 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002251 __kmp_join_restore_state(thread,team);
2252}
2253#endif
2254
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002256__kmp_join_call(ident_t *loc, int gtid
2257#if OMPT_SUPPORT
2258 , enum fork_context_e fork_context
2259#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260#if OMP_40_ENABLED
2261 , int exit_teams
2262#endif /* OMP_40_ENABLED */
2263)
2264{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00002265 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266 kmp_team_t *team;
2267 kmp_team_t *parent_team;
2268 kmp_info_t *master_th;
2269 kmp_root_t *root;
2270 int master_active;
2271 int i;
2272
2273 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2274
2275 /* setup current data */
2276 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002277 root = master_th->th.th_root;
2278 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279 parent_team = team->t.t_parent;
2280
2281 master_th->th.th_ident = loc;
2282
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002283#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002284 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002285 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2286 }
2287#endif
2288
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002289#if KMP_DEBUG
Andrey Churbanovcece72a2017-03-28 13:35:42 +00002290 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002291 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2292 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002293 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2294 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002295 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002296#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002297
2298 if( team->t.t_serialized ) {
2299#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002300 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301 // We are in teams construct
2302 int level = team->t.t_level;
2303 int tlevel = master_th->th.th_teams_level;
2304 if ( level == tlevel ) {
2305 // AC: we haven't incremented it earlier at start of teams construct,
2306 // so do it here - at the end of teams construct
2307 team->t.t_level++;
2308 } else if ( level == tlevel + 1 ) {
2309 // AC: we are exiting parallel inside teams, need to increment serialization
2310 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2311 team->t.t_serialized++;
2312 }
2313 }
2314#endif /* OMP_40_ENABLED */
2315 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316
2317#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002318 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002319 __kmp_join_restore_state(master_th, parent_team);
2320 }
2321#endif
2322
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323 return;
2324 }
2325
2326 master_active = team->t.t_master_active;
2327
2328#if OMP_40_ENABLED
2329 if (!exit_teams)
2330#endif /* OMP_40_ENABLED */
2331 {
2332 // AC: No barrier for internal teams at exit from teams construct.
2333 // But there is barrier for external team (league).
2334 __kmp_internal_join( loc, gtid, team );
2335 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002336#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002337 else {
2338 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2339 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002340#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002341
Jim Cownie5e8470a2013-09-27 10:38:44 +00002342 KMP_MB();
2343
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002344#if OMPT_SUPPORT
2345 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2346#endif
2347
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348#if USE_ITT_BUILD
2349 if ( __itt_stack_caller_create_ptr ) {
2350 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2351 }
2352
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002353 // Mark end of "parallel" region for VTune.
2354 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002356 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002357# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002358 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002359 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002360 // only one notification scheme (either "submit" or "forking/joined", not both)
2361 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2362 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2363 0, loc, master_th->th.th_team_nproc, 1 );
2364 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2365 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2366 __kmp_itt_region_joined( gtid );
2367 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368#endif /* USE_ITT_BUILD */
2369
2370#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372 !exit_teams &&
2373 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2374 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2375 // AC: We need to leave the team structure intact at the end
2376 // of parallel inside the teams construct, so that at the next
2377 // parallel same (hot) team works, only adjust nesting levels
2378
2379 /* Decrement our nested depth level */
2380 team->t.t_level --;
2381 team->t.t_active_level --;
2382 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2383
2384 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002385 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002387 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388 kmp_info_t **other_threads = team->t.t_threads;
2389 team->t.t_nproc = new_num;
2390 for ( i = 0; i < old_num; ++i ) {
2391 other_threads[i]->th.th_team_nproc = new_num;
2392 }
2393 // Adjust states of non-used threads of the team
2394 for ( i = old_num; i < new_num; ++i ) {
2395 // Re-initialize thread's barrier data.
2396 int b;
2397 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002398 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002399 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002400 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002401#if USE_DEBUGGER
2402 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2403#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002405 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2406 // Synchronize thread's task state
2407 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2408 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409 }
2410 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002411
2412#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002413 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002414 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002415 }
2416#endif
2417
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418 return;
2419 }
2420#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002422 /* do cleanup and restore the parent team */
2423 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2424 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2425
2426 master_th->th.th_dispatch =
2427 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428
2429 /* jc: The following lock has instructions with REL and ACQ semantics,
2430 separating the parallel user code called in this parallel region
2431 from the serial user code called after this function returns.
2432 */
2433 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2434
2435#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002436 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002437#endif /* OMP_40_ENABLED */
2438 {
2439 /* Decrement our nested depth level */
2440 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2441 }
2442 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2443
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002444#if OMPT_SUPPORT && OMPT_TRACE
2445 if(ompt_enabled){
2446 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2447 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2448 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2449 parallel_id, task_info->task_id);
2450 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002451 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002452 task_info->task_id = 0;
2453 }
2454#endif
2455
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2457 0, master_th, team ) );
2458 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459
Alp Toker98758b02014-03-02 04:12:06 +00002460#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461 //
2462 // Restore master thread's partition.
2463 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002464 master_th->th.th_first_place = team->t.t_first_place;
2465 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466#endif /* OMP_40_ENABLED */
2467
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002468 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002470 if ( root->r.r_active != master_active )
2471 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002472
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002473 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474
2475 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002476 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002477 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002478 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002479 * assertion failures. it's only one deref&assign so might as well put this
2480 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002481 master_th->th.th_team = parent_team;
2482 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2483 master_th->th.th_team_master = parent_team->t.t_threads[0];
2484 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485
2486 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002487 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488 parent_team != master_th->th.th_serial_team &&
2489 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002490 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2491 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492 }
2493
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002495 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2496 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2497 // Remember master's state if we re-use this nested hot team
2498 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002499 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002500 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002501 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002503 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002504 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002506 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002508
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002509 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2510 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2511 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512
2513 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2514
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002515#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002516 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002517 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002518 }
2519#endif
2520
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521 KMP_MB();
2522 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2523}
2524
2525/* ------------------------------------------------------------------------ */
2526/* ------------------------------------------------------------------------ */
2527
2528/* Check whether we should push an internal control record onto the
2529 serial team stack. If so, do it. */
2530void
2531__kmp_save_internal_controls ( kmp_info_t * thread )
2532{
2533
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535 return;
2536 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002537 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 int push = 0;
2539
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002540 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541 push = 1;
2542 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002543 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2544 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002545 push = 1;
2546 }
2547 }
2548 if (push) { /* push a record on the serial team's stack */
2549 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2550
Jim Cownie5e8470a2013-09-27 10:38:44 +00002551 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552
2553 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2554
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002555 control->next = thread->th.th_team->t.t_control_stack_top;
2556 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002557 }
2558 }
2559}
2560
2561/* Changes set_nproc */
2562void
2563__kmp_set_num_threads( int new_nth, int gtid )
2564{
2565 kmp_info_t *thread;
2566 kmp_root_t *root;
2567
2568 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2569 KMP_DEBUG_ASSERT( __kmp_init_serial );
2570
2571 if (new_nth < 1)
2572 new_nth = 1;
2573 else if (new_nth > __kmp_max_nth)
2574 new_nth = __kmp_max_nth;
2575
Jonathan Peyton45be4502015-08-11 21:36:41 +00002576 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002577 thread = __kmp_threads[gtid];
2578
2579 __kmp_save_internal_controls( thread );
2580
2581 set__nproc( thread, new_nth );
2582
2583 //
2584 // If this omp_set_num_threads() call will cause the hot team size to be
2585 // reduced (in the absence of a num_threads clause), then reduce it now,
2586 // rather than waiting for the next parallel region.
2587 //
2588 root = thread->th.th_root;
2589 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002590 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2591#if KMP_NESTED_HOT_TEAMS
2592 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2593#endif
2594 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595 kmp_team_t *hot_team = root->r.r_hot_team;
2596 int f;
2597
2598 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2599
Jim Cownie5e8470a2013-09-27 10:38:44 +00002600 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2602 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002603 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2604 // When decreasing team size, threads no longer in the team should unref task team.
2605 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2606 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607 __kmp_free_thread( hot_team->t.t_threads[f] );
2608 hot_team->t.t_threads[f] = NULL;
2609 }
2610 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002611#if KMP_NESTED_HOT_TEAMS
2612 if( thread->th.th_hot_teams ) {
2613 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2614 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2615 }
2616#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002617
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2619
2620 //
2621 // Update the t_nproc field in the threads that are still active.
2622 //
2623 for( f=0 ; f < new_nth; f++ ) {
2624 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2625 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002628 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002630}
2631
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632/* Changes max_active_levels */
2633void
2634__kmp_set_max_active_levels( int gtid, int max_active_levels )
2635{
2636 kmp_info_t *thread;
2637
2638 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2639 KMP_DEBUG_ASSERT( __kmp_init_serial );
2640
2641 // validate max_active_levels
2642 if( max_active_levels < 0 ) {
2643 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2644 // We ignore this call if the user has specified a negative value.
2645 // The current setting won't be changed. The last valid setting will be used.
2646 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2647 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2648 return;
2649 }
2650 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2651 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2652 // We allow a zero value. (implementation defined behavior)
2653 } else {
2654 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2655 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2656 // Current upper limit is MAX_INT. (implementation defined behavior)
2657 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2658 // Actually, the flow should never get here until we use MAX_INT limit.
2659 }
2660 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2661
2662 thread = __kmp_threads[ gtid ];
2663
2664 __kmp_save_internal_controls( thread );
2665
2666 set__max_active_levels( thread, max_active_levels );
2667
2668}
2669
2670/* Gets max_active_levels */
2671int
2672__kmp_get_max_active_levels( int gtid )
2673{
2674 kmp_info_t *thread;
2675
2676 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2677 KMP_DEBUG_ASSERT( __kmp_init_serial );
2678
2679 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002680 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002682 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2683 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684}
2685
2686/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2687void
2688__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2689{
2690 kmp_info_t *thread;
2691// kmp_team_t *team;
2692
2693 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2694 KMP_DEBUG_ASSERT( __kmp_init_serial );
2695
2696 // Check if the kind parameter is valid, correct if needed.
2697 // Valid parameters should fit in one of two intervals - standard or extended:
2698 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2699 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2700 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2701 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2702 {
2703 // TODO: Hint needs attention in case we change the default schedule.
2704 __kmp_msg(
2705 kmp_ms_warning,
2706 KMP_MSG( ScheduleKindOutOfRange, kind ),
2707 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2708 __kmp_msg_null
2709 );
2710 kind = kmp_sched_default;
2711 chunk = 0; // ignore chunk value in case of bad kind
2712 }
2713
2714 thread = __kmp_threads[ gtid ];
2715
2716 __kmp_save_internal_controls( thread );
2717
2718 if ( kind < kmp_sched_upper_std ) {
2719 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2720 // differ static chunked vs. unchunked:
2721 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002722 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002724 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725 }
2726 } else {
2727 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002728 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2730 }
2731 if ( kind == kmp_sched_auto ) {
2732 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002733 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002735 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736 }
2737}
2738
2739/* Gets def_sched_var ICV values */
2740void
2741__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2742{
2743 kmp_info_t *thread;
2744 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002745
2746 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2747 KMP_DEBUG_ASSERT( __kmp_init_serial );
2748
2749 thread = __kmp_threads[ gtid ];
2750
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002751 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752
2753 switch ( th_type ) {
2754 case kmp_sch_static:
2755 case kmp_sch_static_greedy:
2756 case kmp_sch_static_balanced:
2757 *kind = kmp_sched_static;
2758 *chunk = 0; // chunk was not set, try to show this fact via zero value
2759 return;
2760 case kmp_sch_static_chunked:
2761 *kind = kmp_sched_static;
2762 break;
2763 case kmp_sch_dynamic_chunked:
2764 *kind = kmp_sched_dynamic;
2765 break;
2766 case kmp_sch_guided_chunked:
2767 case kmp_sch_guided_iterative_chunked:
2768 case kmp_sch_guided_analytical_chunked:
2769 *kind = kmp_sched_guided;
2770 break;
2771 case kmp_sch_auto:
2772 *kind = kmp_sched_auto;
2773 break;
2774 case kmp_sch_trapezoidal:
2775 *kind = kmp_sched_trapezoidal;
2776 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002777#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778 case kmp_sch_static_steal:
2779 *kind = kmp_sched_static_steal;
2780 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002781#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782 default:
2783 KMP_FATAL( UnknownSchedulingType, th_type );
2784 }
2785
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002786 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002787}
2788
2789int
2790__kmp_get_ancestor_thread_num( int gtid, int level ) {
2791
2792 int ii, dd;
2793 kmp_team_t *team;
2794 kmp_info_t *thr;
2795
2796 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2797 KMP_DEBUG_ASSERT( __kmp_init_serial );
2798
2799 // validate level
2800 if( level == 0 ) return 0;
2801 if( level < 0 ) return -1;
2802 thr = __kmp_threads[ gtid ];
2803 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002804 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002805 if( level > ii ) return -1;
2806
2807#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002808 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809 // AC: we are in teams region where multiple nested teams have same level
2810 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2811 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2812 KMP_DEBUG_ASSERT( ii >= tlevel );
2813 // AC: As we need to pass by the teams league, we need to artificially increase ii
2814 if ( ii == tlevel ) {
2815 ii += 2; // three teams have same level
2816 } else {
2817 ii ++; // two teams have same level
2818 }
2819 }
2820 }
2821#endif
2822
2823 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2824
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002825 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002826 level++;
2827 while( ii > level )
2828 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002829 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 {
2831 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002832 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002833 team = team->t.t_parent;
2834 continue;
2835 }
2836 if( ii > level ) {
2837 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002838 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839 ii--;
2840 }
2841 }
2842
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002843 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844}
2845
2846int
2847__kmp_get_team_size( int gtid, int level ) {
2848
2849 int ii, dd;
2850 kmp_team_t *team;
2851 kmp_info_t *thr;
2852
2853 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2854 KMP_DEBUG_ASSERT( __kmp_init_serial );
2855
2856 // validate level
2857 if( level == 0 ) return 1;
2858 if( level < 0 ) return -1;
2859 thr = __kmp_threads[ gtid ];
2860 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002861 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002862 if( level > ii ) return -1;
2863
2864#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002865 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002866 // AC: we are in teams region where multiple nested teams have same level
2867 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2868 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2869 KMP_DEBUG_ASSERT( ii >= tlevel );
2870 // AC: As we need to pass by the teams league, we need to artificially increase ii
2871 if ( ii == tlevel ) {
2872 ii += 2; // three teams have same level
2873 } else {
2874 ii ++; // two teams have same level
2875 }
2876 }
2877 }
2878#endif
2879
2880 while( ii > level )
2881 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002882 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883 {
2884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002885 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002886 team = team->t.t_parent;
2887 continue;
2888 }
2889 if( ii > level ) {
2890 team = team->t.t_parent;
2891 ii--;
2892 }
2893 }
2894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002895 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896}
2897
Jim Cownie5e8470a2013-09-27 10:38:44 +00002898kmp_r_sched_t
2899__kmp_get_schedule_global() {
2900// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2901// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2902
2903 kmp_r_sched_t r_sched;
2904
2905 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2906 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2907 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2908 if ( __kmp_sched == kmp_sch_static ) {
2909 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2910 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2911 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2912 } else {
2913 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2914 }
2915
2916 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2917 r_sched.chunk = KMP_DEFAULT_CHUNK;
2918 } else {
2919 r_sched.chunk = __kmp_chunk;
2920 }
2921
2922 return r_sched;
2923}
2924
2925/* ------------------------------------------------------------------------ */
2926/* ------------------------------------------------------------------------ */
2927
2928
2929/*
2930 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2931 * at least argc number of *t_argv entries for the requested team.
2932 */
2933static void
2934__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2935{
2936
2937 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002938 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939
2940 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2941 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002943 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2944 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002945
2946 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2947 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002948 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2950 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002951 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952 if ( __kmp_storage_map ) {
2953 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2954 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2955 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2956 "team_%d.t_inline_argv",
2957 team->t.t_id );
2958 }
2959 } else {
2960 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002961 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2963 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2964 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002965 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966 if ( __kmp_storage_map ) {
2967 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2968 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2969 team->t.t_id );
2970 }
2971 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972 }
2973}
2974
2975static void
2976__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2977{
2978 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002979 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002980 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2981 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002983 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002984 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002985 team->t.t_max_nproc = max_nth;
2986
2987 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002988 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002989 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002990#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002991 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2992#endif
2993 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002994}
2995
2996static void
2997__kmp_free_team_arrays(kmp_team_t *team) {
2998 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2999 int i;
3000 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3001 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3002 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3003 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3004 }; // if
3005 }; // for
3006 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00003007 __kmp_free(team->t.t_disp_buffer);
3008 __kmp_free(team->t.t_dispatch);
3009 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003010 team->t.t_threads = NULL;
3011 team->t.t_disp_buffer = NULL;
3012 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003013 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014}
3015
3016static void
3017__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3018 kmp_info_t **oldThreads = team->t.t_threads;
3019
Jonathan Peytona58563d2016-03-29 20:05:27 +00003020 __kmp_free(team->t.t_disp_buffer);
3021 __kmp_free(team->t.t_dispatch);
3022 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003023 __kmp_allocate_team_arrays(team, max_nth);
3024
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003025 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003026
3027 __kmp_free(oldThreads);
3028}
3029
3030static kmp_internal_control_t
3031__kmp_get_global_icvs( void ) {
3032
Jim Cownie5e8470a2013-09-27 10:38:44 +00003033 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034
3035#if OMP_40_ENABLED
3036 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3037#endif /* OMP_40_ENABLED */
3038
3039 kmp_internal_control_t g_icvs = {
3040 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003041 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3042 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3043 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003044 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003045#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003046 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003047#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003048 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3049 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3051 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052#if OMP_40_ENABLED
3053 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003054 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055#endif /* OMP_40_ENABLED */
3056 NULL //struct kmp_internal_control *next;
3057 };
3058
3059 return g_icvs;
3060}
3061
3062static kmp_internal_control_t
3063__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3064
Jim Cownie5e8470a2013-09-27 10:38:44 +00003065 kmp_internal_control_t gx_icvs;
3066 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3067 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3068 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003069
3070 return gx_icvs;
3071}
3072
3073static void
3074__kmp_initialize_root( kmp_root_t *root )
3075{
3076 int f;
3077 kmp_team_t *root_team;
3078 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3081 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003082 KMP_DEBUG_ASSERT( root );
3083 KMP_ASSERT( ! root->r.r_begin );
3084
3085 /* setup the root state structure */
3086 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003087 root->r.r_begin = FALSE;
3088 root->r.r_active = FALSE;
3089 root->r.r_in_parallel = 0;
3090 root->r.r_blocktime = __kmp_dflt_blocktime;
3091 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092
3093 /* setup the root team for this task */
3094 /* allocate the root team structure */
3095 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003096
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097 root_team =
3098 __kmp_allocate_team(
3099 root,
3100 1, // new_nproc
3101 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003102#if OMPT_SUPPORT
3103 0, // root parallel id
3104#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003105#if OMP_40_ENABLED
3106 __kmp_nested_proc_bind.bind_types[0],
3107#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003110 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003111 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003112#if USE_DEBUGGER
3113 // Non-NULL value should be assigned to make the debugger display the root team.
3114 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3115#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003116
3117 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3118
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003119 root->r.r_root_team = root_team;
3120 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003121
3122 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003123 root_team->t.t_threads[0] = NULL;
3124 root_team->t.t_nproc = 1;
3125 root_team->t.t_serialized = 1;
3126 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3127 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3128 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003129 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3130 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3131
3132 /* setup the hot team for this task */
3133 /* allocate the hot team structure */
3134 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003135
Jim Cownie5e8470a2013-09-27 10:38:44 +00003136 hot_team =
3137 __kmp_allocate_team(
3138 root,
3139 1, // new_nproc
3140 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003141#if OMPT_SUPPORT
3142 0, // root parallel id
3143#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144#if OMP_40_ENABLED
3145 __kmp_nested_proc_bind.bind_types[0],
3146#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003149 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003150 );
3151 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3152
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003153 root->r.r_hot_team = hot_team;
3154 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
3156 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003157 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003158
3159 /* initialize hot team */
3160 hot_team_max_nth = hot_team->t.t_max_nproc;
3161 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003162 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003163 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003164 hot_team->t.t_nproc = 1;
3165 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3166 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3167 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003168 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003169}
3170
3171#ifdef KMP_DEBUG
3172
3173
3174typedef struct kmp_team_list_item {
3175 kmp_team_p const * entry;
3176 struct kmp_team_list_item * next;
3177} kmp_team_list_item_t;
3178typedef kmp_team_list_item_t * kmp_team_list_t;
3179
3180
3181static void
3182__kmp_print_structure_team_accum( // Add team to list of teams.
3183 kmp_team_list_t list, // List of teams.
3184 kmp_team_p const * team // Team to add.
3185) {
3186
3187 // List must terminate with item where both entry and next are NULL.
3188 // Team is added to the list only once.
3189 // List is sorted in ascending order by team id.
3190 // Team id is *not* a key.
3191
3192 kmp_team_list_t l;
3193
3194 KMP_DEBUG_ASSERT( list != NULL );
3195 if ( team == NULL ) {
3196 return;
3197 }; // if
3198
3199 __kmp_print_structure_team_accum( list, team->t.t_parent );
3200 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3201
3202 // Search list for the team.
3203 l = list;
3204 while ( l->next != NULL && l->entry != team ) {
3205 l = l->next;
3206 }; // while
3207 if ( l->next != NULL ) {
3208 return; // Team has been added before, exit.
3209 }; // if
3210
3211 // Team is not found. Search list again for insertion point.
3212 l = list;
3213 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3214 l = l->next;
3215 }; // while
3216
3217 // Insert team.
3218 {
3219 kmp_team_list_item_t * item =
3220 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3221 * item = * l;
3222 l->entry = team;
3223 l->next = item;
3224 }
3225
3226}
3227
3228static void
3229__kmp_print_structure_team(
3230 char const * title,
3231 kmp_team_p const * team
3232
3233) {
3234 __kmp_printf( "%s", title );
3235 if ( team != NULL ) {
3236 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3237 } else {
3238 __kmp_printf( " - (nil)\n" );
3239 }; // if
3240}
3241
3242static void
3243__kmp_print_structure_thread(
3244 char const * title,
3245 kmp_info_p const * thread
3246
3247) {
3248 __kmp_printf( "%s", title );
3249 if ( thread != NULL ) {
3250 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3251 } else {
3252 __kmp_printf( " - (nil)\n" );
3253 }; // if
3254}
3255
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003256void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003257__kmp_print_structure(
3258 void
3259) {
3260
3261 kmp_team_list_t list;
3262
3263 // Initialize list of teams.
3264 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3265 list->entry = NULL;
3266 list->next = NULL;
3267
3268 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3269 {
3270 int gtid;
3271 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3272 __kmp_printf( "%2d", gtid );
3273 if ( __kmp_threads != NULL ) {
3274 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3275 }; // if
3276 if ( __kmp_root != NULL ) {
3277 __kmp_printf( " %p", __kmp_root[ gtid ] );
3278 }; // if
3279 __kmp_printf( "\n" );
3280 }; // for gtid
3281 }
3282
3283 // Print out __kmp_threads array.
3284 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3285 if ( __kmp_threads != NULL ) {
3286 int gtid;
3287 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3288 kmp_info_t const * thread = __kmp_threads[ gtid ];
3289 if ( thread != NULL ) {
3290 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3291 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3292 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3293 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3294 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3295 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3296 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3297 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3298#if OMP_40_ENABLED
3299 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3300#endif
3301 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3302 __kmp_printf( "\n" );
3303 __kmp_print_structure_team_accum( list, thread->th.th_team );
3304 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3305 }; // if
3306 }; // for gtid
3307 } else {
3308 __kmp_printf( "Threads array is not allocated.\n" );
3309 }; // if
3310
3311 // Print out __kmp_root array.
3312 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3313 if ( __kmp_root != NULL ) {
3314 int gtid;
3315 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3316 kmp_root_t const * root = __kmp_root[ gtid ];
3317 if ( root != NULL ) {
3318 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3319 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3320 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3321 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3322 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3323 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3324 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3325 __kmp_printf( "\n" );
3326 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3327 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3328 }; // if
3329 }; // for gtid
3330 } else {
3331 __kmp_printf( "Ubers array is not allocated.\n" );
3332 }; // if
3333
3334 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3335 while ( list->next != NULL ) {
3336 kmp_team_p const * team = list->entry;
3337 int i;
3338 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3339 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3340 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3341 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3342 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3343 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3344 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3345 __kmp_printf( " Thread %2d: ", i );
3346 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3347 }; // for i
3348 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3349 __kmp_printf( "\n" );
3350 list = list->next;
3351 }; // while
3352
3353 // Print out __kmp_thread_pool and __kmp_team_pool.
3354 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3355 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3356 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3357 __kmp_printf( "\n" );
3358
3359 // Free team list.
3360 while ( list != NULL ) {
3361 kmp_team_list_item_t * item = list;
3362 list = list->next;
3363 KMP_INTERNAL_FREE( item );
3364 }; // while
3365
3366}
3367
3368#endif
3369
3370
3371//---------------------------------------------------------------------------
3372// Stuff for per-thread fast random number generator
3373// Table of primes
3374
3375static const unsigned __kmp_primes[] = {
3376 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3377 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3378 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3379 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3380 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3381 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3382 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3383 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3384 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3385 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3386 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3387 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3388 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3389 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3390 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3391 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3392};
3393
3394//---------------------------------------------------------------------------
3395// __kmp_get_random: Get a random number using a linear congruential method.
3396
3397unsigned short
3398__kmp_get_random( kmp_info_t * thread )
3399{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003400 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401 unsigned short r = x>>16;
3402
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003403 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404
3405 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3406 thread->th.th_info.ds.ds_tid, r) );
3407
3408 return r;
3409}
3410//--------------------------------------------------------
3411// __kmp_init_random: Initialize a random number generator
3412
3413void
3414__kmp_init_random( kmp_info_t * thread )
3415{
3416 unsigned seed = thread->th.th_info.ds.ds_tid;
3417
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003418 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3419 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3420 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003421}
3422
3423
3424#if KMP_OS_WINDOWS
3425/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3426static int
3427__kmp_reclaim_dead_roots(void) {
3428 int i, r = 0;
3429
3430 for(i = 0; i < __kmp_threads_capacity; ++i) {
3431 if( KMP_UBER_GTID( i ) &&
3432 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3433 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3434 r += __kmp_unregister_root_other_thread(i);
3435 }
3436 }
3437 return r;
3438}
3439#endif
3440
3441/*
3442 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3443 free entries generated.
3444
3445 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3446 already dead.
3447
3448 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3449 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3450 __kmp_tp_capacity, if threadprivate cache array has been created.
3451 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3452
3453 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3454 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3455 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3456 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3457 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3458 as many free slots as possible up to nWish.
3459
3460 If any argument is negative, the behavior is undefined.
3461*/
3462static int
3463__kmp_expand_threads(int nWish, int nNeed) {
3464 int added = 0;
3465 int old_tp_cached;
3466 int __kmp_actual_max_nth;
3467
3468 if(nNeed > nWish) /* normalize the arguments */
3469 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003470#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003471/* only for Windows static library */
3472 /* reclaim array entries for root threads that are already dead */
3473 added = __kmp_reclaim_dead_roots();
3474
3475 if(nNeed) {
3476 nNeed -= added;
3477 if(nNeed < 0)
3478 nNeed = 0;
3479 }
3480 if(nWish) {
3481 nWish -= added;
3482 if(nWish < 0)
3483 nWish = 0;
3484 }
3485#endif
3486 if(nWish <= 0)
3487 return added;
3488
3489 while(1) {
3490 int nTarget;
3491 int minimumRequiredCapacity;
3492 int newCapacity;
3493 kmp_info_t **newThreads;
3494 kmp_root_t **newRoot;
3495
3496 //
3497 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3498 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3499 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3500 // become > __kmp_max_nth in one of two ways:
3501 //
3502 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3503 // may not be resused by another thread, so we may need to increase
3504 // __kmp_threads_capacity to __kmp_max_threads + 1.
3505 //
3506 // 2) New foreign root(s) are encountered. We always register new
3507 // foreign roots. This may cause a smaller # of threads to be
3508 // allocated at subsequent parallel regions, but the worker threads
3509 // hang around (and eventually go to sleep) and need slots in the
3510 // __kmp_threads[] array.
3511 //
3512 // Anyway, that is the reason for moving the check to see if
3513 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3514 // instead of having it performed here. -BB
3515 //
3516 old_tp_cached = __kmp_tp_cached;
3517 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3518 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3519
3520 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3521 nTarget = nWish;
3522 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3523 /* can't fulfil nWish, so try nNeed */
3524 if(nNeed) {
3525 nTarget = nNeed;
3526 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3527 /* possible expansion too small -- give up */
3528 break;
3529 }
3530 } else {
3531 /* best-effort */
3532 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3533 if(!nTarget) {
3534 /* can expand at all -- give up */
3535 break;
3536 }
3537 }
3538 }
3539 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3540
3541 newCapacity = __kmp_threads_capacity;
3542 do{
3543 newCapacity =
3544 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3545 (newCapacity << 1) :
3546 __kmp_actual_max_nth;
3547 } while(newCapacity < minimumRequiredCapacity);
3548 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3549 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003550 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3551 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552 memset(newThreads + __kmp_threads_capacity, 0,
3553 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3554 memset(newRoot + __kmp_threads_capacity, 0,
3555 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3556
3557 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3558 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3559 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3560 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3561 of a double-check pair.
3562 */
3563 __kmp_free(newThreads);
3564 continue; /* start over and try again */
3565 }
3566 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3567 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3568 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3569 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3570 __kmp_free(newThreads);
3571 continue; /* start over and try again */
3572 } else {
3573 /* success */
3574 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3575 //
3576 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3577 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3578 added += newCapacity - __kmp_threads_capacity;
3579 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3580 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003581 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003582 }
3583 }
3584 return added;
3585}
3586
3587/* register the current thread as a root thread and obtain our gtid */
3588/* we must have the __kmp_initz_lock held at this point */
3589/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3590int
3591__kmp_register_root( int initial_thread )
3592{
3593 kmp_info_t *root_thread;
3594 kmp_root_t *root;
3595 int gtid;
3596 int capacity;
3597 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3598 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3599 KMP_MB();
3600
3601
3602 /*
3603 2007-03-02:
3604
3605 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3606 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3607 return false (that means there is at least one empty slot in __kmp_threads array), but it
3608 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3609 used for this one. Following code workarounds this bug.
3610
3611 However, right solution seems to be not reserving slot #0 for initial thread because:
3612 (1) there is no magic in slot #0,
3613 (2) we cannot detect initial thread reliably (the first thread which does serial
3614 initialization may be not a real initial thread).
3615 */
3616 capacity = __kmp_threads_capacity;
3617 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3618 -- capacity;
3619 }; // if
3620
3621 /* see if there are too many threads */
3622 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3623 if ( __kmp_tp_cached ) {
3624 __kmp_msg(
3625 kmp_ms_fatal,
3626 KMP_MSG( CantRegisterNewThread ),
3627 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3628 KMP_HNT( PossibleSystemLimitOnThreads ),
3629 __kmp_msg_null
3630 );
3631 }
3632 else {
3633 __kmp_msg(
3634 kmp_ms_fatal,
3635 KMP_MSG( CantRegisterNewThread ),
3636 KMP_HNT( SystemLimitOnThreads ),
3637 __kmp_msg_null
3638 );
3639 }
3640 }; // if
3641
3642 /* find an available thread slot */
3643 /* Don't reassign the zero slot since we need that to only be used by initial
3644 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003645 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3646 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3648 KMP_ASSERT( gtid < __kmp_threads_capacity );
3649
3650 /* update global accounting */
3651 __kmp_all_nth ++;
3652 TCW_4(__kmp_nth, __kmp_nth + 1);
3653
3654 //
3655 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3656 // for low numbers of procs, and method #2 (keyed API call) for higher
3657 // numbers of procs.
3658 //
3659 if ( __kmp_adjust_gtid_mode ) {
3660 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3661 if ( TCR_4(__kmp_gtid_mode) != 2) {
3662 TCW_4(__kmp_gtid_mode, 2);
3663 }
3664 }
3665 else {
3666 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3667 TCW_4(__kmp_gtid_mode, 1);
3668 }
3669 }
3670 }
3671
3672#ifdef KMP_ADJUST_BLOCKTIME
3673 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003674 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3676 if ( __kmp_nth > __kmp_avail_proc ) {
3677 __kmp_zero_bt = TRUE;
3678 }
3679 }
3680#endif /* KMP_ADJUST_BLOCKTIME */
3681
3682 /* setup this new hierarchy */
3683 if( ! ( root = __kmp_root[gtid] )) {
3684 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3685 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3686 }
3687
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003688#if KMP_STATS_ENABLED
3689 // Initialize stats as soon as possible (right after gtid assignment).
3690 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3691 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3692 KMP_SET_THREAD_STATE(SERIAL_REGION);
3693 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3694#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695 __kmp_initialize_root( root );
3696
3697 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003698 if( root->r.r_uber_thread ) {
3699 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700 } else {
3701 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3702 if ( __kmp_storage_map ) {
3703 __kmp_print_thread_storage_map( root_thread, gtid );
3704 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003705 root_thread->th.th_info .ds.ds_gtid = gtid;
3706 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003707 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003708 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003709 }
3710 #if USE_FAST_MEMORY
3711 __kmp_initialize_fast_memory( root_thread );
3712 #endif /* USE_FAST_MEMORY */
3713
3714 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003715 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716 __kmp_initialize_bget( root_thread );
3717 #endif
3718 __kmp_init_random( root_thread ); // Initialize random number generator
3719 }
3720
3721 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003722 if( ! root_thread->th.th_serial_team ) {
3723 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003725
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003726 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003727#if OMPT_SUPPORT
3728 0, // root parallel id
3729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730#if OMP_40_ENABLED
3731 proc_bind_default,
3732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003734 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003736 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003738 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739
3740 /* drop root_thread into place */
3741 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3742
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003743 root->r.r_root_team->t.t_threads[0] = root_thread;
3744 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3745 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3746 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3747 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748
3749 /* initialize the thread, get it ready to go */
3750 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003751 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752
3753 /* prepare the master thread for get_gtid() */
3754 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003755
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003756#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003757 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003758#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003759
Jim Cownie5e8470a2013-09-27 10:38:44 +00003760 #ifdef KMP_TDATA_GTID
3761 __kmp_gtid = gtid;
3762 #endif
3763 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3764 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765
3766 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3767 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003768 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769 KMP_INIT_BARRIER_STATE ) );
3770 { // Initialize barrier data.
3771 int b;
3772 for ( b = 0; b < bs_last_barrier; ++ b ) {
3773 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003774#if USE_DEBUGGER
3775 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3776#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777 }; // for
3778 }
3779 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3780
Alp Toker763b9392014-02-28 09:42:41 +00003781#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003782# if OMP_40_ENABLED
3783 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3784 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3785 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3786 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3787# endif
3788
Jim Cownie5e8470a2013-09-27 10:38:44 +00003789 if ( TCR_4(__kmp_init_middle) ) {
3790 __kmp_affinity_set_init_mask( gtid, TRUE );
3791 }
Alp Toker763b9392014-02-28 09:42:41 +00003792#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003793
3794 __kmp_root_counter ++;
3795
3796 KMP_MB();
3797 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3798
3799 return gtid;
3800}
3801
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003802#if KMP_NESTED_HOT_TEAMS
3803static int
3804__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3805{
3806 int i, n, nth;
3807 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3808 if( !hot_teams || !hot_teams[level].hot_team ) {
3809 return 0;
3810 }
3811 KMP_DEBUG_ASSERT( level < max_level );
3812 kmp_team_t *team = hot_teams[level].hot_team;
3813 nth = hot_teams[level].hot_team_nth;
3814 n = nth - 1; // master is not freed
3815 if( level < max_level - 1 ) {
3816 for( i = 0; i < nth; ++i ) {
3817 kmp_info_t *th = team->t.t_threads[i];
3818 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3819 if( i > 0 && th->th.th_hot_teams ) {
3820 __kmp_free( th->th.th_hot_teams );
3821 th->th.th_hot_teams = NULL;
3822 }
3823 }
3824 }
3825 __kmp_free_team( root, team, NULL );
3826 return n;
3827}
3828#endif
3829
Jim Cownie5e8470a2013-09-27 10:38:44 +00003830/* Resets a root thread and clear its root and hot teams.
3831 Returns the number of __kmp_threads entries directly and indirectly freed.
3832*/
3833static int
3834__kmp_reset_root(int gtid, kmp_root_t *root)
3835{
3836 kmp_team_t * root_team = root->r.r_root_team;
3837 kmp_team_t * hot_team = root->r.r_hot_team;
3838 int n = hot_team->t.t_nproc;
3839 int i;
3840
3841 KMP_DEBUG_ASSERT( ! root->r.r_active );
3842
3843 root->r.r_root_team = NULL;
3844 root->r.r_hot_team = NULL;
3845 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3846 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003847 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3848#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003849 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003850 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3851 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003852 if( __kmp_hot_teams_max_level > 1 ) {
3853 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3854 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003855 if( th->th.th_hot_teams ) {
3856 __kmp_free( th->th.th_hot_teams );
3857 th->th.th_hot_teams = NULL;
3858 }
3859 }
3860 }
3861#endif
3862 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003863
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864 //
3865 // Before we can reap the thread, we need to make certain that all
3866 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3867 //
3868 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3869 __kmp_wait_to_unref_task_teams();
3870 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871
3872 #if KMP_OS_WINDOWS
3873 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3874 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3875 (LPVOID)&(root->r.r_uber_thread->th),
3876 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3877 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3878 #endif /* KMP_OS_WINDOWS */
3879
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003880#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003881 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003882 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3883 int gtid = __kmp_get_gtid();
3884 __ompt_thread_end(ompt_thread_initial, gtid);
3885 }
3886#endif
3887
Jim Cownie5e8470a2013-09-27 10:38:44 +00003888 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3889 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3890
3891 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3892 root->r.r_uber_thread = NULL;
3893 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003894 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003895
3896 return n;
3897}
3898
3899void
3900__kmp_unregister_root_current_thread( int gtid )
3901{
Jim Cownie77c2a632014-09-03 11:34:33 +00003902 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003903 /* this lock should be ok, since unregister_root_current_thread is never called during
3904 * and abort, only during a normal close. furthermore, if you have the
3905 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003906
3907 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3908 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3909 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3910 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3911 return;
3912 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003913 kmp_root_t *root = __kmp_root[gtid];
3914
Jim Cownie5e8470a2013-09-27 10:38:44 +00003915 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3916 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3917 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3918 KMP_ASSERT( root->r.r_active == FALSE );
3919
Jim Cownie5e8470a2013-09-27 10:38:44 +00003920
3921 KMP_MB();
3922
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003923#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003924 kmp_info_t * thread = __kmp_threads[gtid];
3925 kmp_team_t * team = thread->th.th_team;
3926 kmp_task_team_t * task_team = thread->th.th_task_team;
3927
3928 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003929 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3930#if OMPT_SUPPORT
3931 // the runtime is shutting down so we won't report any events
3932 thread->th.ompt_thread_info.state = ompt_state_undefined;
3933#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003934 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003935 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003936#endif
3937
Jim Cownie5e8470a2013-09-27 10:38:44 +00003938 __kmp_reset_root(gtid, root);
3939
3940 /* free up this thread slot */
3941 __kmp_gtid_set_specific( KMP_GTID_DNE );
3942#ifdef KMP_TDATA_GTID
3943 __kmp_gtid = KMP_GTID_DNE;
3944#endif
3945
3946 KMP_MB();
3947 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3948
3949 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3950}
3951
Jonathan Peyton2321d572015-06-08 19:25:25 +00003952#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953/* __kmp_forkjoin_lock must be already held
3954 Unregisters a root thread that is not the current thread. Returns the number of
3955 __kmp_threads entries freed as a result.
3956 */
3957static int
3958__kmp_unregister_root_other_thread( int gtid )
3959{
3960 kmp_root_t *root = __kmp_root[gtid];
3961 int r;
3962
3963 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3964 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3965 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3966 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3967 KMP_ASSERT( root->r.r_active == FALSE );
3968
3969 r = __kmp_reset_root(gtid, root);
3970 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3971 return r;
3972}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003973#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974
Jim Cownie5e8470a2013-09-27 10:38:44 +00003975#if KMP_DEBUG
3976void __kmp_task_info() {
3977
3978 kmp_int32 gtid = __kmp_entry_gtid();
3979 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3980 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003981 kmp_team_t *steam = this_thr->th.th_serial_team;
3982 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003983
3984 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3985 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3986}
3987#endif // KMP_DEBUG
3988
Jim Cownie5e8470a2013-09-27 10:38:44 +00003989/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003990 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003991 * initialization of features as much as possible */
3992static void
3993__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3994{
3995 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3996 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003997 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003998 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003999 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004001 KMP_DEBUG_ASSERT( team->t.t_threads );
4002 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4003 KMP_DEBUG_ASSERT( master );
4004 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004005
4006 KMP_MB();
4007
4008 TCW_SYNC_PTR(this_thr->th.th_team, team);
4009
4010 this_thr->th.th_info.ds.ds_tid = tid;
4011 this_thr->th.th_set_nproc = 0;
Andrey Churbanov581490e2017-02-06 18:53:32 +00004012 if (__kmp_tasking_mode != tskm_immediate_exec)
4013 // When tasking is possible, threads are not safe to reap until they are
4014 // done tasking; this will be set when tasking code is exited in wait
4015 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4016 else // no tasking --> always safe to reap
4017 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004018#if OMP_40_ENABLED
4019 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00004020# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004021 this_thr->th.th_new_place = this_thr->th.th_current_place;
4022# endif
4023#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004024 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004025
4026 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004027 this_thr->th.th_team_nproc = team->t.t_nproc;
4028 this_thr->th.th_team_master = master;
4029 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004030 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4031
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004032 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004033
4034 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4035 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4036
4037 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4038
4039 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4040 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4041 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
4043 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004044 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004045
4046 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004047
4048#ifdef BUILD_TV
4049 this_thr->th.th_local.tv_data = 0;
4050#endif
4051
4052 if ( ! this_thr->th.th_pri_common ) {
4053 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4054 if ( __kmp_storage_map ) {
4055 __kmp_print_storage_map_gtid(
4056 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4057 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4058 );
4059 }; // if
4060 this_thr->th.th_pri_head = NULL;
4061 }; // if
4062
4063 /* Initialize dynamic dispatch */
4064 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004065 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066 /*
4067 * Use team max_nproc since this will never change for the team.
4068 */
4069 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004070 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4072 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004073 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004074 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4075
4076 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004077#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004078 dispatch->th_doacross_buf_idx = 0;
4079#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004080 if( ! dispatch->th_disp_buffer ) {
4081 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004082
4083 if ( __kmp_storage_map ) {
4084 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004085 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004086 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4087 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4088 gtid, team->t.t_id, gtid );
4089 }
4090 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004091 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004092 }
4093
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004094 dispatch->th_dispatch_pr_current = 0;
4095 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004096
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004097 dispatch->th_deo_fcn = 0; /* ORDERED */
4098 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099 }
4100
4101 this_thr->th.th_next_pool = NULL;
4102
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004103 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004104 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004105 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4106 this_thr->th.th_task_state_top = 0;
4107 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004108 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4109 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004110 }
4111
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4113 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4114
4115 KMP_MB();
4116}
4117
4118
4119/* allocate a new thread for the requesting team. this is only called from within a
4120 * forkjoin critical section. we will first try to get an available thread from the
4121 * thread pool. if none is available, we will fork a new one assuming we are able
4122 * to create a new one. this should be assured, as the caller should check on this
4123 * first.
4124 */
4125kmp_info_t *
4126__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4127{
4128 kmp_team_t *serial_team;
4129 kmp_info_t *new_thr;
4130 int new_gtid;
4131
4132 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4133 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004134#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004135 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004136#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004137 KMP_MB();
4138
4139 /* first, try to get one from the thread pool */
4140 if ( __kmp_thread_pool ) {
4141
4142 new_thr = (kmp_info_t*)__kmp_thread_pool;
4143 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4144 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4145 __kmp_thread_pool_insert_pt = NULL;
4146 }
4147 TCW_4(new_thr->th.th_in_pool, FALSE);
4148 //
4149 // Don't touch th_active_in_pool or th_active.
4150 // The worker thread adjusts those flags as it sleeps/awakens.
4151 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004152 __kmp_thread_pool_nth--;
4153
4154 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4155 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004156 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004157 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4158 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4159
4160 /* setup the thread structure */
4161 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4162 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4163
4164 TCW_4(__kmp_nth, __kmp_nth + 1);
4165
Jonathan Peyton54127982015-11-04 21:37:48 +00004166 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004167 new_thr->th.th_task_state_top = 0;
4168 new_thr->th.th_task_state_stack_sz = 4;
4169
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170#ifdef KMP_ADJUST_BLOCKTIME
4171 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004172 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004173 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4174 if ( __kmp_nth > __kmp_avail_proc ) {
4175 __kmp_zero_bt = TRUE;
4176 }
4177 }
4178#endif /* KMP_ADJUST_BLOCKTIME */
4179
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004180#if KMP_DEBUG
4181 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4182 int b;
4183 kmp_balign_t * balign = new_thr->th.th_bar;
4184 for( b = 0; b < bs_last_barrier; ++ b )
4185 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4186#endif
4187
Jim Cownie5e8470a2013-09-27 10:38:44 +00004188 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4189 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4190
4191 KMP_MB();
4192 return new_thr;
4193 }
4194
4195
4196 /* no, well fork a new one */
4197 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4198 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4199
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004200#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004201 //
4202 // If this is the first worker thread the RTL is creating, then also
4203 // launch the monitor thread. We try to do this as early as possible.
4204 //
4205 if ( ! TCR_4( __kmp_init_monitor ) ) {
4206 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4207 if ( ! TCR_4( __kmp_init_monitor ) ) {
4208 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4209 TCW_4( __kmp_init_monitor, 1 );
4210 __kmp_create_monitor( & __kmp_monitor );
4211 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004212 #if KMP_OS_WINDOWS
4213 // AC: wait until monitor has started. This is a fix for CQ232808.
4214 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4215 // work in between, then there is high probability that monitor thread started after
4216 // the library shutdown. At shutdown it is too late to cope with the problem, because
4217 // when the master is in DllMain (process detach) the monitor has no chances to start
4218 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4219 // because all the memory which the monitor can access is going to be released/reset.
4220 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4221 KMP_YIELD( TRUE );
4222 }
4223 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4224 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225 }
4226 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4227 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004228#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004229
4230 KMP_MB();
4231 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4232 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4233 }
4234
4235 /* allocate space for it. */
4236 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4237
4238 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4239
4240 if ( __kmp_storage_map ) {
4241 __kmp_print_thread_storage_map( new_thr, new_gtid );
4242 }
4243
4244 /* add the reserve serialized team, initialized from the team's master thread */
4245 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004247 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004248
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004249 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004250 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004251#if OMPT_SUPPORT
4252 0, // root parallel id
4253#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254#if OMP_40_ENABLED
4255 proc_bind_default,
4256#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004257 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004258 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004259 }
4260 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004261 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4262 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004263 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4264 new_thr ) );
4265
4266 /* setup the thread structures */
4267 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4268
4269 #if USE_FAST_MEMORY
4270 __kmp_initialize_fast_memory( new_thr );
4271 #endif /* USE_FAST_MEMORY */
4272
4273 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004274 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004275 __kmp_initialize_bget( new_thr );
4276 #endif
4277
4278 __kmp_init_random( new_thr ); // Initialize random number generator
4279
4280 /* Initialize these only once when thread is grabbed for a team allocation */
4281 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4282 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4283
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004284 int b;
4285 kmp_balign_t * balign = new_thr->th.th_bar;
4286 for(b=0; b<bs_last_barrier; ++b) {
4287 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4288 balign[b].bb.team = NULL;
4289 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4290 balign[b].bb.use_oncore_barrier = 0;
4291 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004292
4293 new_thr->th.th_spin_here = FALSE;
4294 new_thr->th.th_next_waiting = 0;
4295
Alp Toker98758b02014-03-02 04:12:06 +00004296#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004297 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4298 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4299 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4300 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4301#endif
4302
4303 TCW_4(new_thr->th.th_in_pool, FALSE);
4304 new_thr->th.th_active_in_pool = FALSE;
4305 TCW_4(new_thr->th.th_active, TRUE);
4306
4307 /* adjust the global counters */
4308 __kmp_all_nth ++;
4309 __kmp_nth ++;
4310
4311 //
4312 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4313 // for low numbers of procs, and method #2 (keyed API call) for higher
4314 // numbers of procs.
4315 //
4316 if ( __kmp_adjust_gtid_mode ) {
4317 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4318 if ( TCR_4(__kmp_gtid_mode) != 2) {
4319 TCW_4(__kmp_gtid_mode, 2);
4320 }
4321 }
4322 else {
4323 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4324 TCW_4(__kmp_gtid_mode, 1);
4325 }
4326 }
4327 }
4328
4329#ifdef KMP_ADJUST_BLOCKTIME
4330 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004331 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004332 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4333 if ( __kmp_nth > __kmp_avail_proc ) {
4334 __kmp_zero_bt = TRUE;
4335 }
4336 }
4337#endif /* KMP_ADJUST_BLOCKTIME */
4338
4339 /* actually fork it and create the new worker thread */
4340 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4341 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4342 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4343
Jim Cownie5e8470a2013-09-27 10:38:44 +00004344 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4345 KMP_MB();
4346 return new_thr;
4347}
4348
4349/*
4350 * reinitialize team for reuse.
4351 *
4352 * The hot team code calls this case at every fork barrier, so EPCC barrier
4353 * test are extremely sensitive to changes in it, esp. writes to the team
4354 * struct, which cause a cache invalidation in all threads.
4355 *
4356 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4357 */
4358static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004359__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004360 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4361 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004362 KMP_DEBUG_ASSERT( team && new_icvs);
4363 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004364 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004365
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004366 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004367
Jim Cownie181b4bb2013-12-23 17:28:57 +00004368 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004369 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004370 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004371
4372 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4373 team->t.t_threads[0], team ) );
4374}
4375
Jim Cownie5e8470a2013-09-27 10:38:44 +00004376
4377/* initialize the team data structure
4378 * this assumes the t_threads and t_max_nproc are already set
4379 * also, we don't touch the arguments */
4380static void
4381__kmp_initialize_team(
4382 kmp_team_t * team,
4383 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004384 kmp_internal_control_t * new_icvs,
4385 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004387 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4388
Jim Cownie5e8470a2013-09-27 10:38:44 +00004389 /* verify */
4390 KMP_DEBUG_ASSERT( team );
4391 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4392 KMP_DEBUG_ASSERT( team->t.t_threads );
4393 KMP_MB();
4394
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004395 team->t.t_master_tid = 0; /* not needed */
4396 /* team->t.t_master_bar; not needed */
4397 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4398 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004399
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004400 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4401 team->t.t_next_pool = NULL;
4402 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004403
4404 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004405 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004406
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004407 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4408 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409
4410#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004411 team->t.t_fp_control_saved = FALSE; /* not needed */
4412 team->t.t_x87_fpu_control_word = 0; /* not needed */
4413 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004414#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4415
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004416 team->t.t_construct = 0;
4417 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004419 team->t.t_ordered .dt.t_value = 0;
4420 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004421
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004422 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004423
4424#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004425 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004426#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004427 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004428
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004429 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004430
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004431 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432
4433 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004434 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435}
4436
Alp Toker98758b02014-03-02 04:12:06 +00004437#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004438/* Sets full mask for thread and returns old mask, no changes to structures. */
4439static void
4440__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4441{
4442 if ( KMP_AFFINITY_CAPABLE() ) {
4443 int status;
4444 if ( old_mask != NULL ) {
4445 status = __kmp_get_system_affinity( old_mask, TRUE );
4446 int error = errno;
4447 if ( status != 0 ) {
4448 __kmp_msg(
4449 kmp_ms_fatal,
4450 KMP_MSG( ChangeThreadAffMaskError ),
4451 KMP_ERR( error ),
4452 __kmp_msg_null
4453 );
4454 }
4455 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004456 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457 }
4458}
4459#endif
4460
Alp Toker98758b02014-03-02 04:12:06 +00004461#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004462
4463//
4464// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4465// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004466// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004467// The master thread's partition should already include its current binding.
4468//
4469static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004470__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004471{
4472 //
4473 // Copy the master thread's place partion to the team struct
4474 //
4475 kmp_info_t *master_th = team->t.t_threads[0];
4476 KMP_DEBUG_ASSERT( master_th != NULL );
4477 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4478 int first_place = master_th->th.th_first_place;
4479 int last_place = master_th->th.th_last_place;
4480 int masters_place = master_th->th.th_current_place;
4481 team->t.t_first_place = first_place;
4482 team->t.t_last_place = last_place;
4483
4484 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4485 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4486 masters_place, first_place, last_place ) );
4487
4488 switch ( proc_bind ) {
4489
4490 case proc_bind_default:
4491 //
4492 // serial teams might have the proc_bind policy set to
4493 // proc_bind_default. It doesn't matter, as we don't
4494 // rebind the master thread for any proc_bind policy.
4495 //
4496 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4497 break;
4498
4499 case proc_bind_master:
4500 {
4501 int f;
4502 int n_th = team->t.t_nproc;
4503 for ( f = 1; f < n_th; f++ ) {
4504 kmp_info_t *th = team->t.t_threads[f];
4505 KMP_DEBUG_ASSERT( th != NULL );
4506 th->th.th_first_place = first_place;
4507 th->th.th_last_place = last_place;
4508 th->th.th_new_place = masters_place;
4509
4510 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4511 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4512 team->t.t_id, f, masters_place, first_place, last_place ) );
4513 }
4514 }
4515 break;
4516
4517 case proc_bind_close:
4518 {
4519 int f;
4520 int n_th = team->t.t_nproc;
4521 int n_places;
4522 if ( first_place <= last_place ) {
4523 n_places = last_place - first_place + 1;
4524 }
4525 else {
4526 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4527 }
4528 if ( n_th <= n_places ) {
4529 int place = masters_place;
4530 for ( f = 1; f < n_th; f++ ) {
4531 kmp_info_t *th = team->t.t_threads[f];
4532 KMP_DEBUG_ASSERT( th != NULL );
4533
4534 if ( place == last_place ) {
4535 place = first_place;
4536 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004537 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004538 place = 0;
4539 }
4540 else {
4541 place++;
4542 }
4543 th->th.th_first_place = first_place;
4544 th->th.th_last_place = last_place;
4545 th->th.th_new_place = place;
4546
4547 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4548 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4549 team->t.t_id, f, place, first_place, last_place ) );
4550 }
4551 }
4552 else {
4553 int S, rem, gap, s_count;
4554 S = n_th / n_places;
4555 s_count = 0;
4556 rem = n_th - ( S * n_places );
4557 gap = rem > 0 ? n_places/rem : n_places;
4558 int place = masters_place;
4559 int gap_ct = gap;
4560 for ( f = 0; f < n_th; f++ ) {
4561 kmp_info_t *th = team->t.t_threads[f];
4562 KMP_DEBUG_ASSERT( th != NULL );
4563
4564 th->th.th_first_place = first_place;
4565 th->th.th_last_place = last_place;
4566 th->th.th_new_place = place;
4567 s_count++;
4568
4569 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4570 // do nothing, add an extra thread to place on next iteration
4571 }
4572 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4573 // we added an extra thread to this place; move to next place
4574 if ( place == last_place ) {
4575 place = first_place;
4576 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004577 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004578 place = 0;
4579 }
4580 else {
4581 place++;
4582 }
4583 s_count = 0;
4584 gap_ct = 1;
4585 rem--;
4586 }
4587 else if (s_count == S) { // place full; don't add extra
4588 if ( place == last_place ) {
4589 place = first_place;
4590 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004591 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004592 place = 0;
4593 }
4594 else {
4595 place++;
4596 }
4597 gap_ct++;
4598 s_count = 0;
4599 }
4600
4601 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4602 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4603 team->t.t_id, f, th->th.th_new_place, first_place,
4604 last_place ) );
4605 }
4606 KMP_DEBUG_ASSERT( place == masters_place );
4607 }
4608 }
4609 break;
4610
4611 case proc_bind_spread:
4612 {
4613 int f;
4614 int n_th = team->t.t_nproc;
4615 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004616 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004617 if ( first_place <= last_place ) {
4618 n_places = last_place - first_place + 1;
4619 }
4620 else {
4621 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4622 }
4623 if ( n_th <= n_places ) {
4624 int place = masters_place;
4625 int S = n_places/n_th;
4626 int s_count, rem, gap, gap_ct;
4627 rem = n_places - n_th*S;
4628 gap = rem ? n_th/rem : 1;
4629 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004630 thidx = n_th;
4631 if (update_master_only == 1)
4632 thidx = 1;
4633 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004634 kmp_info_t *th = team->t.t_threads[f];
4635 KMP_DEBUG_ASSERT( th != NULL );
4636
4637 th->th.th_first_place = place;
4638 th->th.th_new_place = place;
4639 s_count = 1;
4640 while (s_count < S) {
4641 if ( place == last_place ) {
4642 place = first_place;
4643 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004644 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004645 place = 0;
4646 }
4647 else {
4648 place++;
4649 }
4650 s_count++;
4651 }
4652 if (rem && (gap_ct == gap)) {
4653 if ( place == last_place ) {
4654 place = first_place;
4655 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004656 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004657 place = 0;
4658 }
4659 else {
4660 place++;
4661 }
4662 rem--;
4663 gap_ct = 0;
4664 }
4665 th->th.th_last_place = place;
4666 gap_ct++;
4667
4668 if ( place == last_place ) {
4669 place = first_place;
4670 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004671 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004672 place = 0;
4673 }
4674 else {
4675 place++;
4676 }
4677
4678 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4679 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4680 team->t.t_id, f, th->th.th_new_place,
4681 th->th.th_first_place, th->th.th_last_place ) );
4682 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004683 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004684 }
4685 else {
4686 int S, rem, gap, s_count;
4687 S = n_th / n_places;
4688 s_count = 0;
4689 rem = n_th - ( S * n_places );
4690 gap = rem > 0 ? n_places/rem : n_places;
4691 int place = masters_place;
4692 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004693 thidx = n_th;
4694 if (update_master_only == 1)
4695 thidx = 1;
4696 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004697 kmp_info_t *th = team->t.t_threads[f];
4698 KMP_DEBUG_ASSERT( th != NULL );
4699
4700 th->th.th_first_place = place;
4701 th->th.th_last_place = place;
4702 th->th.th_new_place = place;
4703 s_count++;
4704
4705 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4706 // do nothing, add an extra thread to place on next iteration
4707 }
4708 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4709 // we added an extra thread to this place; move on to next place
4710 if ( place == last_place ) {
4711 place = first_place;
4712 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004713 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004714 place = 0;
4715 }
4716 else {
4717 place++;
4718 }
4719 s_count = 0;
4720 gap_ct = 1;
4721 rem--;
4722 }
4723 else if (s_count == S) { // place is full; don't add extra thread
4724 if ( place == last_place ) {
4725 place = first_place;
4726 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004727 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004728 place = 0;
4729 }
4730 else {
4731 place++;
4732 }
4733 gap_ct++;
4734 s_count = 0;
4735 }
4736
4737 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4738 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4739 team->t.t_id, f, th->th.th_new_place,
4740 th->th.th_first_place, th->th.th_last_place) );
4741 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004742 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004743 }
4744 }
4745 break;
4746
4747 default:
4748 break;
4749 }
4750
4751 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4752}
4753
Alp Toker98758b02014-03-02 04:12:06 +00004754#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004755
4756/* allocate a new team data structure to use. take one off of the free pool if available */
4757kmp_team_t *
4758__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004759#if OMPT_SUPPORT
4760 ompt_parallel_id_t ompt_parallel_id,
4761#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004762#if OMP_40_ENABLED
4763 kmp_proc_bind_t new_proc_bind,
4764#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004765 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004766 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004767{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00004768 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004769 int f;
4770 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004771 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004772 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004773
4774 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4775 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4776 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4777 KMP_MB();
4778
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004779#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004780 kmp_hot_team_ptr_t *hot_teams;
4781 if( master ) {
4782 team = master->th.th_team;
4783 level = team->t.t_active_level;
4784 if( master->th.th_teams_microtask ) { // in teams construct?
4785 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4786 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4787 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4788 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4789 }
4790 }
4791 hot_teams = master->th.th_hot_teams;
4792 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4793 { // hot team has already been allocated for given level
4794 use_hot_team = 1;
4795 } else {
4796 use_hot_team = 0;
4797 }
4798 }
4799#endif
4800 // Optimization to use a "hot" team
4801 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004802 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004803#if KMP_NESTED_HOT_TEAMS
4804 team = hot_teams[level].hot_team;
4805#else
4806 team = root->r.r_hot_team;
4807#endif
4808#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004809 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004810 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4811 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004812 }
4813#endif
4814
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004815 // Has the number of threads changed?
4816 /* Let's assume the most common case is that the number of threads is unchanged, and
4817 put that case first. */
4818 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4819 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004820 // This case can mean that omp_set_num_threads() was called and the hot team size
4821 // was already reduced, so we check the special flag
4822 if ( team->t.t_size_changed == -1 ) {
4823 team->t.t_size_changed = 1;
4824 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004825 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004826 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004827
4828 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004829 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004830 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4831 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004832 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004833
4834 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4835
4836 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4837 0, team->t.t_threads[0], team ) );
4838 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4839
4840#if OMP_40_ENABLED
4841# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004842 if ( ( team->t.t_size_changed == 0 )
4843 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004844 if (new_proc_bind == proc_bind_spread) {
4845 __kmp_partition_places(team, 1); // add flag to update only master for spread
4846 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004847 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4848 team->t.t_id, new_proc_bind, team->t.t_first_place,
4849 team->t.t_last_place ) );
4850 }
4851 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004852 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004853 __kmp_partition_places( team );
4854 }
4855# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004856 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004857# endif /* KMP_AFFINITY_SUPPORTED */
4858#endif /* OMP_40_ENABLED */
4859 }
4860 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004861 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4862
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004863 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004864#if KMP_NESTED_HOT_TEAMS
4865 if( __kmp_hot_teams_mode == 0 ) {
4866 // AC: saved number of threads should correspond to team's value in this mode,
4867 // can be bigger in mode 1, when hot team has some threads in reserve
4868 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4869 hot_teams[level].hot_team_nth = new_nproc;
4870#endif // KMP_NESTED_HOT_TEAMS
4871 /* release the extra threads we don't need any more */
4872 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4873 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004874 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4875 // When decreasing team size, threads no longer in the team should unref task team.
4876 team->t.t_threads[f]->th.th_task_team = NULL;
4877 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004878 __kmp_free_thread( team->t.t_threads[ f ] );
4879 team->t.t_threads[ f ] = NULL;
4880 }
4881#if KMP_NESTED_HOT_TEAMS
4882 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004883 else {
4884 // When keeping extra threads in team, switch threads to wait on own b_go flag
4885 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4886 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4887 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4888 for (int b=0; b<bs_last_barrier; ++b) {
4889 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4890 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4891 }
4892 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4893 }
4894 }
4895 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004896#endif // KMP_NESTED_HOT_TEAMS
4897 team->t.t_nproc = new_nproc;
4898 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004899 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4900 team->t.t_sched.chunk != new_icvs->sched.chunk)
4901 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004902 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004903
Jim Cownie5e8470a2013-09-27 10:38:44 +00004904 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004905 for(f = 0; f < new_nproc; ++f) {
4906 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004907 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004908 // restore the current task state of the master thread: should be the implicit task
4909 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4910 0, team->t.t_threads[0], team ) );
4911
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004912 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004913
4914#ifdef KMP_DEBUG
4915 for ( f = 0; f < team->t.t_nproc; f++ ) {
4916 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4917 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4918 }
4919#endif
4920
4921#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004922 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004923# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004924 __kmp_partition_places( team );
4925# endif
4926#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004927 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004928 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004929#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004930 kmp_affin_mask_t *old_mask;
4931 if ( KMP_AFFINITY_CAPABLE() ) {
4932 KMP_CPU_ALLOC(old_mask);
4933 }
4934#endif
4935
4936 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4937
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004938 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004939
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004940#if KMP_NESTED_HOT_TEAMS
4941 int avail_threads = hot_teams[level].hot_team_nth;
4942 if( new_nproc < avail_threads )
4943 avail_threads = new_nproc;
4944 kmp_info_t **other_threads = team->t.t_threads;
4945 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4946 // Adjust barrier data of reserved threads (if any) of the team
4947 // Other data will be set in __kmp_initialize_info() below.
4948 int b;
4949 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4950 for ( b = 0; b < bs_last_barrier; ++ b ) {
4951 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4952 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004953#if USE_DEBUGGER
4954 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4955#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004956 }
4957 }
4958 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4959 // we have all needed threads in reserve, no need to allocate any
4960 // this only possible in mode 1, cannot have reserved threads in mode 0
4961 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4962 team->t.t_nproc = new_nproc; // just get reserved threads involved
4963 } else {
4964 // we may have some threads in reserve, but not enough
4965 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4966 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4967#endif // KMP_NESTED_HOT_TEAMS
4968 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004969 /* reallocate larger arrays */
4970 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004971 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004972 }
4973
Alp Toker98758b02014-03-02 04:12:06 +00004974#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004975 /* Temporarily set full mask for master thread before
4976 creation of workers. The reason is that workers inherit
4977 the affinity from master, so if a lot of workers are
4978 created on the single core quickly, they don't get
4979 a chance to set their own affinity for a long time.
4980 */
4981 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4982#endif
4983
4984 /* allocate new threads for the hot team */
4985 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4986 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4987 KMP_DEBUG_ASSERT( new_worker );
4988 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004989
Jonathan Peytond26e2132015-09-10 18:44:30 +00004990 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004991 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4992 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4993 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4994
4995 { // Initialize barrier data for new threads.
4996 int b;
4997 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004998 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004999 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005000 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005001#if USE_DEBUGGER
5002 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5003#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005004 }
5005 }
5006 }
5007
Alp Toker98758b02014-03-02 04:12:06 +00005008#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005009 if ( KMP_AFFINITY_CAPABLE() ) {
5010 /* Restore initial master thread's affinity mask */
5011 __kmp_set_system_affinity( old_mask, TRUE );
5012 KMP_CPU_FREE(old_mask);
5013 }
5014#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005015#if KMP_NESTED_HOT_TEAMS
5016 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5017#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005018 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00005019 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005020 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005021
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005022 /* reinitialize the threads */
5023 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005024 for (f=0; f < team->t.t_nproc; ++f)
5025 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5026 if (level) { // set th_task_state for new threads in nested hot team
5027 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005028 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005029 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5030 for (f=old_nproc; f < team->t.t_nproc; ++f)
5031 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005032 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005033 else { // set th_task_state for new threads in non-nested hot team
5034 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5035 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005036 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005037 }
5038
Jim Cownie5e8470a2013-09-27 10:38:44 +00005039#ifdef KMP_DEBUG
5040 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5041 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5042 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5043 }
5044#endif
5045
5046#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005047 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005048# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005049 __kmp_partition_places( team );
5050# endif
5051#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005052 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005053
5054#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005055 kmp_info_t *master = team->t.t_threads[0];
5056 if( master->th.th_teams_microtask ) {
5057 for( f = 1; f < new_nproc; ++f ) {
5058 // propagate teams construct specific info to workers
5059 kmp_info_t *thr = team->t.t_threads[f];
5060 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5061 thr->th.th_teams_level = master->th.th_teams_level;
5062 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005064 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005065#endif /* OMP_40_ENABLED */
5066#if KMP_NESTED_HOT_TEAMS
5067 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005068 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005069 for( f = 1; f < new_nproc; ++f ) {
5070 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005071 int b;
5072 kmp_balign_t * balign = thr->th.th_bar;
5073 for( b = 0; b < bs_last_barrier; ++ b ) {
5074 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5075 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005076#if USE_DEBUGGER
5077 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5078#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005079 }
5080 }
5081 }
5082#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005083
5084 /* reallocate space for arguments if necessary */
5085 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005086 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005087 //
5088 // The hot team re-uses the previous task team,
5089 // if untouched during the previous release->gather phase.
5090 //
5091
5092 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5093
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005094#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005095 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005096 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5097 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005098 }
5099#endif
5100
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005101#if OMPT_SUPPORT
5102 __ompt_team_assign_id(team, ompt_parallel_id);
5103#endif
5104
Jim Cownie5e8470a2013-09-27 10:38:44 +00005105 KMP_MB();
5106
5107 return team;
5108 }
5109
5110 /* next, let's try to take one from the team pool */
5111 KMP_MB();
5112 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5113 {
5114 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5115 if ( team->t.t_max_nproc >= max_nproc ) {
5116 /* take this team from the team pool */
5117 __kmp_team_pool = team->t.t_next_pool;
5118
5119 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005120 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005121
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005122 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5123 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5124 team->t.t_task_team[0] = NULL;
5125 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005126
5127 /* reallocate space for arguments if necessary */
5128 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005129 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005130
5131 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5132 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5133 { // Initialize barrier data.
5134 int b;
5135 for ( b = 0; b < bs_last_barrier; ++ b) {
5136 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005137#if USE_DEBUGGER
5138 team->t.t_bar[ b ].b_master_arrived = 0;
5139 team->t.t_bar[ b ].b_team_arrived = 0;
5140#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005141 }
5142 }
5143
5144#if OMP_40_ENABLED
5145 team->t.t_proc_bind = new_proc_bind;
5146#endif
5147
5148 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005149
5150#if OMPT_SUPPORT
5151 __ompt_team_assign_id(team, ompt_parallel_id);
5152#endif
5153
Jim Cownie5e8470a2013-09-27 10:38:44 +00005154 KMP_MB();
5155
5156 return team;
5157 }
5158
5159 /* reap team if it is too small, then loop back and check the next one */
5160 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5161 /* TODO: Use technique to find the right size hot-team, don't reap them */
5162 team = __kmp_reap_team( team );
5163 __kmp_team_pool = team;
5164 }
5165
5166 /* nothing available in the pool, no matter, make a new team! */
5167 KMP_MB();
5168 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5169
5170 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005171 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005172 /* NOTE well, for some reason allocating one big buffer and dividing it
5173 * up seems to really hurt performance a lot on the P4, so, let's not use
5174 * this... */
5175 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005176
5177 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005178 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005179
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005180 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5181 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5182 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5183 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005184
5185 if ( __kmp_storage_map ) {
5186 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5187 }
5188
5189 /* allocate space for arguments */
5190 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005191 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005192
5193 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5194 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5195 { // Initialize barrier data.
5196 int b;
5197 for ( b = 0; b < bs_last_barrier; ++ b ) {
5198 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005199#if USE_DEBUGGER
5200 team->t.t_bar[ b ].b_master_arrived = 0;
5201 team->t.t_bar[ b ].b_team_arrived = 0;
5202#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005203 }
5204 }
5205
5206#if OMP_40_ENABLED
5207 team->t.t_proc_bind = new_proc_bind;
5208#endif
5209
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005210#if OMPT_SUPPORT
5211 __ompt_team_assign_id(team, ompt_parallel_id);
5212 team->t.ompt_serialized_team_info = NULL;
5213#endif
5214
Jim Cownie5e8470a2013-09-27 10:38:44 +00005215 KMP_MB();
5216
5217 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5218
5219 return team;
5220}
5221
5222/* TODO implement hot-teams at all levels */
5223/* TODO implement lazy thread release on demand (disband request) */
5224
5225/* free the team. return it to the team pool. release all the threads
5226 * associated with it */
5227void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005228__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005229{
5230 int f;
5231 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5232
5233 /* verify state */
5234 KMP_DEBUG_ASSERT( root );
5235 KMP_DEBUG_ASSERT( team );
5236 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5237 KMP_DEBUG_ASSERT( team->t.t_threads );
5238
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005239 int use_hot_team = team == root->r.r_hot_team;
5240#if KMP_NESTED_HOT_TEAMS
5241 int level;
5242 kmp_hot_team_ptr_t *hot_teams;
5243 if( master ) {
5244 level = team->t.t_active_level - 1;
5245 if( master->th.th_teams_microtask ) { // in teams construct?
5246 if( master->th.th_teams_size.nteams > 1 ) {
5247 ++level; // level was not increased in teams construct for team_of_masters
5248 }
5249 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5250 master->th.th_teams_level == team->t.t_level ) {
5251 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5252 } // team->t.t_level will be increased inside parallel
5253 }
5254 hot_teams = master->th.th_hot_teams;
5255 if( level < __kmp_hot_teams_max_level ) {
5256 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5257 use_hot_team = 1;
5258 }
5259 }
5260#endif // KMP_NESTED_HOT_TEAMS
5261
Jim Cownie5e8470a2013-09-27 10:38:44 +00005262 /* team is done working */
5263 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005264 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005265 // Do not reset pointer to parent team to NULL for hot teams.
5266
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005267 /* if we are non-hot team, release our threads */
5268 if( ! use_hot_team ) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005269 if (__kmp_tasking_mode != tskm_immediate_exec) {
5270 // Wait for threads to reach reapable state
5271 for (f = 1; f < team->t.t_nproc; ++f) {
5272 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005273 kmp_info_t *th = team->t.t_threads[f];
5274 volatile kmp_uint32 *state = &th->th.th_reap_state;
Andrey Churbanov581490e2017-02-06 18:53:32 +00005275 while (*state != KMP_SAFE_TO_REAP) {
5276#if KMP_OS_WINDOWS
5277 // On Windows a thread can be killed at any time, check this
5278 DWORD ecode;
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005279 if (!__kmp_is_thread_alive(th, &ecode)) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005280 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005281 break;
5282 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005283#endif
Andrey Churbanov435b419d2017-03-21 13:48:52 +00005284 // first check if thread is sleeping
5285 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5286 if (fl.is_sleeping())
5287 fl.resume(__kmp_gtid_from_thread(th));
5288 KMP_CPU_PAUSE();
Andrey Churbanov581490e2017-02-06 18:53:32 +00005289 }
5290 }
5291
Jonathan Peyton54127982015-11-04 21:37:48 +00005292 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005293 int tt_idx;
5294 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005295 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5296 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005297 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5298 team->t.t_threads[f]->th.th_task_team = NULL;
5299 }
5300 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005301#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005302 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005303#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005304 team->t.t_task_team[tt_idx] = NULL;
5305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005306 }
5307 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005308
5309 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005310 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005311 team->t.t_level = 0;
5312 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005313
Jim Cownie5e8470a2013-09-27 10:38:44 +00005314 /* free the worker threads */
5315 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5316 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5317 __kmp_free_thread( team->t.t_threads[ f ] );
5318 team->t.t_threads[ f ] = NULL;
5319 }
5320
Jim Cownie5e8470a2013-09-27 10:38:44 +00005321 /* put the team back in the team pool */
5322 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005323 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005324 __kmp_team_pool = (volatile kmp_team_t*) team;
5325 }
5326
5327 KMP_MB();
5328}
5329
5330
5331/* reap the team. destroy it, reclaim all its resources and free its memory */
5332kmp_team_t *
5333__kmp_reap_team( kmp_team_t *team )
5334{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005335 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005336
5337 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005338 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5339 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5340 KMP_DEBUG_ASSERT( team->t.t_threads );
5341 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005342
5343 /* TODO clean the threads that are a part of this? */
5344
5345 /* free stuff */
5346
5347 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005348 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5349 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005350 __kmp_free( team );
5351
5352 KMP_MB();
5353 return next_pool;
5354}
5355
5356//
5357// Free the thread. Don't reap it, just place it on the pool of available
5358// threads.
5359//
5360// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5361// binding for the affinity mechanism to be useful.
5362//
5363// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5364// However, we want to avoid a potential performance problem by always
5365// scanning through the list to find the correct point at which to insert
5366// the thread (potential N**2 behavior). To do this we keep track of the
5367// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5368// With single-level parallelism, threads will always be added to the tail
5369// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5370// parallelism, all bets are off and we may need to scan through the entire
5371// free list.
5372//
5373// This change also has a potentially large performance benefit, for some
5374// applications. Previously, as threads were freed from the hot team, they
5375// would be placed back on the free list in inverse order. If the hot team
5376// grew back to it's original size, then the freed thread would be placed
5377// back on the hot team in reverse order. This could cause bad cache
5378// locality problems on programs where the size of the hot team regularly
5379// grew and shrunk.
5380//
5381// Now, for single-level parallelism, the OMP tid is alway == gtid.
5382//
5383void
5384__kmp_free_thread( kmp_info_t *this_th )
5385{
5386 int gtid;
5387 kmp_info_t **scan;
5388
5389 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5390 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5391
5392 KMP_DEBUG_ASSERT( this_th );
5393
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005394 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5395 int b;
5396 kmp_balign_t *balign = this_th->th.th_bar;
5397 for (b=0; b<bs_last_barrier; ++b) {
5398 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5399 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5400 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005401 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005402 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005403 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005404
Jim Cownie5e8470a2013-09-27 10:38:44 +00005405 /* put thread back on the free pool */
5406 TCW_PTR(this_th->th.th_team, NULL);
5407 TCW_PTR(this_th->th.th_root, NULL);
5408 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5409
5410 //
5411 // If the __kmp_thread_pool_insert_pt is already past the new insert
5412 // point, then we need to re-scan the entire list.
5413 //
5414 gtid = this_th->th.th_info.ds.ds_gtid;
5415 if ( __kmp_thread_pool_insert_pt != NULL ) {
5416 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5417 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5418 __kmp_thread_pool_insert_pt = NULL;
5419 }
5420 }
5421
5422 //
5423 // Scan down the list to find the place to insert the thread.
5424 // scan is the address of a link in the list, possibly the address of
5425 // __kmp_thread_pool itself.
5426 //
5427 // In the absence of nested parallism, the for loop will have 0 iterations.
5428 //
5429 if ( __kmp_thread_pool_insert_pt != NULL ) {
5430 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5431 }
5432 else {
5433 scan = (kmp_info_t **)&__kmp_thread_pool;
5434 }
5435 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5436 scan = &( (*scan)->th.th_next_pool ) );
5437
5438 //
5439 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5440 // to its address.
5441 //
5442 TCW_PTR(this_th->th.th_next_pool, *scan);
5443 __kmp_thread_pool_insert_pt = *scan = this_th;
5444 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5445 || ( this_th->th.th_info.ds.ds_gtid
5446 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5447 TCW_4(this_th->th.th_in_pool, TRUE);
5448 __kmp_thread_pool_nth++;
5449
5450 TCW_4(__kmp_nth, __kmp_nth - 1);
5451
5452#ifdef KMP_ADJUST_BLOCKTIME
5453 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005454 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005455 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5456 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5457 if ( __kmp_nth <= __kmp_avail_proc ) {
5458 __kmp_zero_bt = FALSE;
5459 }
5460 }
5461#endif /* KMP_ADJUST_BLOCKTIME */
5462
5463 KMP_MB();
5464}
5465
Jim Cownie5e8470a2013-09-27 10:38:44 +00005466
Jim Cownie5e8470a2013-09-27 10:38:44 +00005467/* ------------------------------------------------------------------------ */
5468
5469void *
5470__kmp_launch_thread( kmp_info_t *this_thr )
5471{
5472 int gtid = this_thr->th.th_info.ds.ds_gtid;
5473/* void *stack_data;*/
5474 kmp_team_t *(*volatile pteam);
5475
5476 KMP_MB();
5477 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5478
5479 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005480 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005481 }
5482
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005483#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005484 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005485 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5486 this_thr->th.ompt_thread_info.wait_id = 0;
5487 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005488 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005489 __ompt_thread_begin(ompt_thread_worker, gtid);
5490 }
5491 }
5492#endif
5493
Jim Cownie5e8470a2013-09-27 10:38:44 +00005494 /* This is the place where threads wait for work */
5495 while( ! TCR_4(__kmp_global.g.g_done) ) {
5496 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5497 KMP_MB();
5498
5499 /* wait for work to do */
5500 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5501
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005502#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005503 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005504 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5505 }
5506#endif
5507
Jim Cownie5e8470a2013-09-27 10:38:44 +00005508 /* No tid yet since not part of a team */
5509 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5510
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005511#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005512 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005513 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5514 }
5515#endif
5516
Jim Cownie5e8470a2013-09-27 10:38:44 +00005517 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5518
5519 /* have we been allocated? */
5520 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005521#if OMPT_SUPPORT
5522 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005523 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005524 if (ompt_enabled) {
5525 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005526 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005527 }
5528#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005529 /* we were just woken up, so run our new task */
5530 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5531 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005532 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5533 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005534
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005535 updateHWFPControl (*pteam);
5536
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005537#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005538 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005539 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005540 // Initialize OMPT task id for implicit task.
5541 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005542 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005543 }
5544#endif
5545
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005546 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005549 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005550 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005551 KMP_ASSERT( rc );
5552
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005553#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005554 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005555 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005556 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005557
5558 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5559 }
5560#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005561 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005562 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5563 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005564 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005565 /* join barrier after parallel region */
5566 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005567#if OMPT_SUPPORT && OMPT_TRACE
5568 if (ompt_enabled) {
5569 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005570 // don't access *pteam here: it may have already been freed
5571 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005572 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5573 my_parallel_id, task_info->task_id);
5574 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005575 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005576 task_info->task_id = 0;
5577 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005578#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005579 }
5580 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005581 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005582
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005583#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005584 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005585 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5586 __ompt_thread_end(ompt_thread_worker, gtid);
5587 }
5588#endif
5589
Jonathan Peyton54127982015-11-04 21:37:48 +00005590 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005591 /* run the destructors for the threadprivate data for this thread */
5592 __kmp_common_destroy_gtid( gtid );
5593
5594 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5595 KMP_MB();
5596 return this_thr;
5597}
5598
5599/* ------------------------------------------------------------------------ */
5600/* ------------------------------------------------------------------------ */
5601
Jim Cownie5e8470a2013-09-27 10:38:44 +00005602void
5603__kmp_internal_end_dest( void *specific_gtid )
5604{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005605 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005606 #pragma warning( push )
5607 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5608 #endif
5609 // Make sure no significant bits are lost
5610 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005611 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005612 #pragma warning( pop )
5613 #endif
5614
5615 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5616 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5617 * this is because 0 is reserved for the nothing-stored case */
5618
5619 /* josh: One reason for setting the gtid specific data even when it is being
5620 destroyed by pthread is to allow gtid lookup through thread specific data
5621 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5622 that gets executed in the call to __kmp_internal_end_thread, actually
5623 gets the gtid through the thread specific data. Setting it here seems
5624 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5625 to run smoothly.
5626 todo: get rid of this after we remove the dependence on
5627 __kmp_gtid_get_specific
5628 */
5629 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5630 __kmp_gtid_set_specific( gtid );
5631 #ifdef KMP_TDATA_GTID
5632 __kmp_gtid = gtid;
5633 #endif
5634 __kmp_internal_end_thread( gtid );
5635}
5636
Jonathan Peyton99016992015-05-26 17:32:53 +00005637#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005638
5639// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005640// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005641// option in makefile.mk works fine.
5642
5643__attribute__(( destructor ))
5644void
5645__kmp_internal_end_dtor( void )
5646{
5647 __kmp_internal_end_atexit();
5648}
5649
5650void
5651__kmp_internal_end_fini( void )
5652{
5653 __kmp_internal_end_atexit();
5654}
5655
5656#endif
5657
5658/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5659void
5660__kmp_internal_end_atexit( void )
5661{
5662 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5663 /* [Windows]
5664 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5665 stat code that depends on thread specific data for gtid fails because that data becomes
5666 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5667 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5668 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5669
5670// TODO: Can some of this comment about GVS be removed?
5671 I suspect that the offending stat code is executed when the calling thread tries to
5672 clean up a dead root thread's data structures, resulting in GVS code trying to close
5673 the GVS structures for that thread, but since the stat code uses
5674 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5675 cleaning up itself instead of another thread, it gets confused. This happens because
5676 allowing a thread to unregister and cleanup another thread is a recent modification for
5677 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5678 thread may end up trying to unregister another thread only if thread death does not
5679 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5680 specific data destructor function to detect thread death. For Windows dynamic, there
5681 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5682 workaround is applicable only for Windows static stat library.
5683 */
5684 __kmp_internal_end_library( -1 );
5685 #if KMP_OS_WINDOWS
5686 __kmp_close_console();
5687 #endif
5688}
5689
5690static void
5691__kmp_reap_thread(
5692 kmp_info_t * thread,
5693 int is_root
5694) {
5695
Alp Toker8f2d3f02014-02-24 10:40:15 +00005696 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005697
5698 int gtid;
5699
5700 KMP_DEBUG_ASSERT( thread != NULL );
5701
5702 gtid = thread->th.th_info.ds.ds_gtid;
5703
5704 if ( ! is_root ) {
5705
5706 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5707 /* Assume the threads are at the fork barrier here */
5708 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5709 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00005710 ANNOTATE_HAPPENS_BEFORE(thread);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005711 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5712 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005713 }; // if
5714
Jim Cownie5e8470a2013-09-27 10:38:44 +00005715 // Terminate OS thread.
5716 __kmp_reap_worker( thread );
5717
5718 //
5719 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005720 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005721 //
5722 // There is a small timing hole here - if the worker thread was
5723 // just waking up after sleeping in the pool, had reset it's
5724 // th_active_in_pool flag but not decremented the global counter
5725 // __kmp_thread_pool_active_nth yet, then the global counter
5726 // might not get updated.
5727 //
5728 // Currently, this can only happen as the library is unloaded,
5729 // so there are no harmful side effects.
5730 //
5731 if ( thread->th.th_active_in_pool ) {
5732 thread->th.th_active_in_pool = FALSE;
5733 KMP_TEST_THEN_DEC32(
5734 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5735 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5736 }
5737
5738 // Decrement # of [worker] threads in the pool.
5739 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5740 --__kmp_thread_pool_nth;
5741 }; // if
5742
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005743 __kmp_free_implicit_task(thread);
5744
Jim Cownie5e8470a2013-09-27 10:38:44 +00005745 // Free the fast memory for tasking
5746 #if USE_FAST_MEMORY
5747 __kmp_free_fast_memory( thread );
5748 #endif /* USE_FAST_MEMORY */
5749
5750 __kmp_suspend_uninitialize_thread( thread );
5751
5752 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5753 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5754
5755 -- __kmp_all_nth;
5756 // __kmp_nth was decremented when thread is added to the pool.
5757
5758#ifdef KMP_ADJUST_BLOCKTIME
5759 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005760 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005761 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5762 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5763 if ( __kmp_nth <= __kmp_avail_proc ) {
5764 __kmp_zero_bt = FALSE;
5765 }
5766 }
5767#endif /* KMP_ADJUST_BLOCKTIME */
5768
5769 /* free the memory being used */
5770 if( __kmp_env_consistency_check ) {
5771 if ( thread->th.th_cons ) {
5772 __kmp_free_cons_stack( thread->th.th_cons );
5773 thread->th.th_cons = NULL;
5774 }; // if
5775 }
5776
5777 if ( thread->th.th_pri_common != NULL ) {
5778 __kmp_free( thread->th.th_pri_common );
5779 thread->th.th_pri_common = NULL;
5780 }; // if
5781
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005782 if (thread->th.th_task_state_memo_stack != NULL) {
5783 __kmp_free(thread->th.th_task_state_memo_stack);
5784 thread->th.th_task_state_memo_stack = NULL;
5785 }
5786
Jim Cownie5e8470a2013-09-27 10:38:44 +00005787 #if KMP_USE_BGET
5788 if ( thread->th.th_local.bget_data != NULL ) {
5789 __kmp_finalize_bget( thread );
5790 }; // if
5791 #endif
5792
Alp Toker98758b02014-03-02 04:12:06 +00005793#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005794 if ( thread->th.th_affin_mask != NULL ) {
5795 KMP_CPU_FREE( thread->th.th_affin_mask );
5796 thread->th.th_affin_mask = NULL;
5797 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005798#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005799
5800 __kmp_reap_team( thread->th.th_serial_team );
5801 thread->th.th_serial_team = NULL;
5802 __kmp_free( thread );
5803
5804 KMP_MB();
5805
5806} // __kmp_reap_thread
5807
5808static void
5809__kmp_internal_end(void)
5810{
5811 int i;
5812
5813 /* First, unregister the library */
5814 __kmp_unregister_library();
5815
5816 #if KMP_OS_WINDOWS
5817 /* In Win static library, we can't tell when a root actually dies, so we
5818 reclaim the data structures for any root threads that have died but not
5819 unregistered themselves, in order to shut down cleanly.
5820 In Win dynamic library we also can't tell when a thread dies.
5821 */
5822 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5823 #endif
5824
5825 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5826 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005827 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005828 break;
5829 KMP_MB(); /* Flush all pending memory write invalidates. */
5830 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5831
5832 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005833#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005834 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5835 KMP_MB(); /* Flush all pending memory write invalidates. */
5836
5837 //
5838 // Need to check that monitor was initialized before reaping it.
5839 // If we are called form __kmp_atfork_child (which sets
5840 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5841 // contain valid data, but it is only valid in the parent process,
5842 // not the child.
5843 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005844 // New behavior (201008): instead of keying off of the flag
5845 // __kmp_init_parallel, the monitor thread creation is keyed off
5846 // of the new flag __kmp_init_monitor.
5847 //
5848 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5849 if ( TCR_4( __kmp_init_monitor ) ) {
5850 __kmp_reap_monitor( & __kmp_monitor );
5851 TCW_4( __kmp_init_monitor, 0 );
5852 }
5853 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5854 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005855#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005856 } else {
5857 /* TODO move this to cleanup code */
5858 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005859 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005860 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5861 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005862// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005863 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005864 }
5865 }
5866 #endif
5867
5868 KMP_MB();
5869
5870 // Reap the worker threads.
5871 // This is valid for now, but be careful if threads are reaped sooner.
5872 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5873 // Get the next thread from the pool.
5874 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5875 __kmp_thread_pool = thread->th.th_next_pool;
5876 // Reap it.
Andrey Churbanov581490e2017-02-06 18:53:32 +00005877 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005878 thread->th.th_next_pool = NULL;
5879 thread->th.th_in_pool = FALSE;
5880 __kmp_reap_thread( thread, 0 );
5881 }; // while
5882 __kmp_thread_pool_insert_pt = NULL;
5883
5884 // Reap teams.
5885 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5886 // Get the next team from the pool.
5887 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5888 __kmp_team_pool = team->t.t_next_pool;
5889 // Reap it.
5890 team->t.t_next_pool = NULL;
5891 __kmp_reap_team( team );
5892 }; // while
5893
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005894 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005895
5896 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5897 // TBD: Add some checking...
5898 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5899 }
5900
5901 /* Make sure all threadprivate destructors get run by joining with all worker
5902 threads before resetting this flag */
5903 TCW_SYNC_4(__kmp_init_common, FALSE);
5904
5905 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5906 KMP_MB();
5907
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005908#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005909 //
5910 // See note above: One of the possible fixes for CQ138434 / CQ140126
5911 //
5912 // FIXME: push both code fragments down and CSE them?
5913 // push them into __kmp_cleanup() ?
5914 //
5915 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5916 if ( TCR_4( __kmp_init_monitor ) ) {
5917 __kmp_reap_monitor( & __kmp_monitor );
5918 TCW_4( __kmp_init_monitor, 0 );
5919 }
5920 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5921 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005922#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005923 } /* else !__kmp_global.t_active */
5924 TCW_4(__kmp_init_gtid, FALSE);
5925 KMP_MB(); /* Flush all pending memory write invalidates. */
5926
Jim Cownie5e8470a2013-09-27 10:38:44 +00005927 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005928#if OMPT_SUPPORT
5929 ompt_fini();
5930#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005931}
5932
5933void
5934__kmp_internal_end_library( int gtid_req )
5935{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005936 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5937 /* this shouldn't be a race condition because __kmp_internal_end() is the
5938 * only place to clear __kmp_serial_init */
5939 /* we'll check this later too, after we get the lock */
5940 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5941 // because the next check will work in any case.
5942 if( __kmp_global.g.g_abort ) {
5943 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5944 /* TODO abort? */
5945 return;
5946 }
5947 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5948 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5949 return;
5950 }
5951
5952
5953 KMP_MB(); /* Flush all pending memory write invalidates. */
5954
5955 /* find out who we are and what we should do */
5956 {
5957 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5958 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5959 if( gtid == KMP_GTID_SHUTDOWN ) {
5960 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5961 return;
5962 } else if( gtid == KMP_GTID_MONITOR ) {
5963 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5964 return;
5965 } else if( gtid == KMP_GTID_DNE ) {
5966 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5967 /* we don't know who we are, but we may still shutdown the library */
5968 } else if( KMP_UBER_GTID( gtid )) {
5969 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005970 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005971 __kmp_global.g.g_abort = -1;
5972 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5973 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5974 return;
5975 } else {
5976 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5977 __kmp_unregister_root_current_thread( gtid );
5978 }
5979 } else {
5980 /* worker threads may call this function through the atexit handler, if they call exit() */
5981 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5982 TODO: do a thorough shutdown instead
5983 */
5984 #ifdef DUMP_DEBUG_ON_EXIT
5985 if ( __kmp_debug_buf )
5986 __kmp_dump_debug_buffer( );
5987 #endif
5988 return;
5989 }
5990 }
5991 /* synchronize the termination process */
5992 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5993
5994 /* have we already finished */
5995 if( __kmp_global.g.g_abort ) {
5996 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5997 /* TODO abort? */
5998 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5999 return;
6000 }
6001 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6002 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6003 return;
6004 }
6005
6006 /* We need this lock to enforce mutex between this reading of
6007 __kmp_threads_capacity and the writing by __kmp_register_root.
6008 Alternatively, we can use a counter of roots that is
6009 atomically updated by __kmp_get_global_thread_id_reg,
6010 __kmp_do_serial_initialize and __kmp_internal_end_*.
6011 */
6012 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6013
6014 /* now we can safely conduct the actual termination */
6015 __kmp_internal_end();
6016
6017 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6018 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6019
6020 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
6021
6022 #ifdef DUMP_DEBUG_ON_EXIT
6023 if ( __kmp_debug_buf )
6024 __kmp_dump_debug_buffer();
6025 #endif
6026
6027 #if KMP_OS_WINDOWS
6028 __kmp_close_console();
6029 #endif
6030
6031 __kmp_fini_allocator();
6032
6033} // __kmp_internal_end_library
6034
6035void
6036__kmp_internal_end_thread( int gtid_req )
6037{
6038 int i;
6039
6040 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6041 /* this shouldn't be a race condition because __kmp_internal_end() is the
6042 * only place to clear __kmp_serial_init */
6043 /* we'll check this later too, after we get the lock */
6044 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6045 // because the next check will work in any case.
6046 if( __kmp_global.g.g_abort ) {
6047 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6048 /* TODO abort? */
6049 return;
6050 }
6051 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6052 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6053 return;
6054 }
6055
6056 KMP_MB(); /* Flush all pending memory write invalidates. */
6057
6058 /* find out who we are and what we should do */
6059 {
6060 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6061 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6062 if( gtid == KMP_GTID_SHUTDOWN ) {
6063 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6064 return;
6065 } else if( gtid == KMP_GTID_MONITOR ) {
6066 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6067 return;
6068 } else if( gtid == KMP_GTID_DNE ) {
6069 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6070 return;
6071 /* we don't know who we are */
6072 } else if( KMP_UBER_GTID( gtid )) {
6073 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006074 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006075 __kmp_global.g.g_abort = -1;
6076 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6077 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6078 return;
6079 } else {
6080 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6081 __kmp_unregister_root_current_thread( gtid );
6082 }
6083 } else {
6084 /* just a worker thread, let's leave */
6085 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6086
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006087 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006088 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006089 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006090
6091 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6092 return;
6093 }
6094 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006095 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006096 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6097 // because we will better shutdown later in the library destructor.
6098 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006099 // in a loop forks and joins many openmp threads. We can save a lot of time
6100 // keeping worker threads alive until the program shutdown.
6101 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6102 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006103 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006104 return;
6105 #endif
6106 /* synchronize the termination process */
6107 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6108
6109 /* have we already finished */
6110 if( __kmp_global.g.g_abort ) {
6111 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6112 /* TODO abort? */
6113 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6114 return;
6115 }
6116 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6117 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6118 return;
6119 }
6120
6121 /* We need this lock to enforce mutex between this reading of
6122 __kmp_threads_capacity and the writing by __kmp_register_root.
6123 Alternatively, we can use a counter of roots that is
6124 atomically updated by __kmp_get_global_thread_id_reg,
6125 __kmp_do_serial_initialize and __kmp_internal_end_*.
6126 */
6127
6128 /* should we finish the run-time? are all siblings done? */
6129 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6130
6131 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6132 if ( KMP_UBER_GTID( i ) ) {
6133 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6134 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6135 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6136 return;
6137 };
6138 }
6139
6140 /* now we can safely conduct the actual termination */
6141
6142 __kmp_internal_end();
6143
6144 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6145 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6146
Jim Cownie77c2a632014-09-03 11:34:33 +00006147 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006148
6149 #ifdef DUMP_DEBUG_ON_EXIT
6150 if ( __kmp_debug_buf )
6151 __kmp_dump_debug_buffer();
6152 #endif
6153} // __kmp_internal_end_thread
6154
6155// -------------------------------------------------------------------------------------------------
6156// Library registration stuff.
6157
6158static long __kmp_registration_flag = 0;
6159 // Random value used to indicate library initialization.
6160static char * __kmp_registration_str = NULL;
6161 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6162
6163
6164static inline
6165char *
6166__kmp_reg_status_name() {
6167 /*
6168 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6169 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6170 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6171 */
6172 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6173} // __kmp_reg_status_get
6174
6175
6176void
6177__kmp_register_library_startup(
6178 void
6179) {
6180
6181 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6182 int done = 0;
6183 union {
6184 double dtime;
6185 long ltime;
6186 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006187 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006188 __kmp_initialize_system_tick();
6189 #endif
6190 __kmp_read_system_time( & time.dtime );
6191 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6192 __kmp_registration_str =
6193 __kmp_str_format(
6194 "%p-%lx-%s",
6195 & __kmp_registration_flag,
6196 __kmp_registration_flag,
6197 KMP_LIBRARY_FILE
6198 );
6199
6200 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6201
6202 while ( ! done ) {
6203
6204 char * value = NULL; // Actual value of the environment variable.
6205
6206 // Set environment variable, but do not overwrite if it is exist.
6207 __kmp_env_set( name, __kmp_registration_str, 0 );
6208 // Check the variable is written.
6209 value = __kmp_env_get( name );
6210 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6211
6212 done = 1; // Ok, environment variable set successfully, exit the loop.
6213
6214 } else {
6215
6216 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6217 // Check whether it alive or dead.
6218 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6219 char * tail = value;
6220 char * flag_addr_str = NULL;
6221 char * flag_val_str = NULL;
6222 char const * file_name = NULL;
6223 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6224 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6225 file_name = tail;
6226 if ( tail != NULL ) {
6227 long * flag_addr = 0;
6228 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006229 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6230 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006231 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6232 // First, check whether environment-encoded address is mapped into addr space.
6233 // If so, dereference it to see if it still has the right value.
6234
6235 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6236 neighbor = 1;
6237 } else {
6238 // If not, then we know the other copy of the library is no longer running.
6239 neighbor = 2;
6240 }; // if
6241 }; // if
6242 }; // if
6243 switch ( neighbor ) {
6244 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6245 // Assume it is the incompatible format of future version of the library.
6246 // Assume the other library is alive.
6247 // WARN( ... ); // TODO: Issue a warning.
6248 file_name = "unknown library";
6249 // Attention! Falling to the next case. That's intentional.
6250 case 1 : { // Neighbor is alive.
6251 // Check it is allowed.
6252 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6253 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6254 // That's not allowed. Issue fatal error.
6255 __kmp_msg(
6256 kmp_ms_fatal,
6257 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6258 KMP_HNT( DuplicateLibrary ),
6259 __kmp_msg_null
6260 );
6261 }; // if
6262 KMP_INTERNAL_FREE( duplicate_ok );
6263 __kmp_duplicate_library_ok = 1;
6264 done = 1; // Exit the loop.
6265 } break;
6266 case 2 : { // Neighbor is dead.
6267 // Clear the variable and try to register library again.
6268 __kmp_env_unset( name );
6269 } break;
6270 default : {
6271 KMP_DEBUG_ASSERT( 0 );
6272 } break;
6273 }; // switch
6274
6275 }; // if
6276 KMP_INTERNAL_FREE( (void *) value );
6277
6278 }; // while
6279 KMP_INTERNAL_FREE( (void *) name );
6280
6281} // func __kmp_register_library_startup
6282
6283
6284void
6285__kmp_unregister_library( void ) {
6286
6287 char * name = __kmp_reg_status_name();
6288 char * value = __kmp_env_get( name );
6289
6290 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6291 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6292 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6293 // Ok, this is our variable. Delete it.
6294 __kmp_env_unset( name );
6295 }; // if
6296
6297 KMP_INTERNAL_FREE( __kmp_registration_str );
6298 KMP_INTERNAL_FREE( value );
6299 KMP_INTERNAL_FREE( name );
6300
6301 __kmp_registration_flag = 0;
6302 __kmp_registration_str = NULL;
6303
6304} // __kmp_unregister_library
6305
6306
6307// End of Library registration stuff.
6308// -------------------------------------------------------------------------------------------------
6309
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006310#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6311
6312static void __kmp_check_mic_type()
6313{
6314 kmp_cpuid_t cpuid_state = {0};
6315 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006316 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006317 // We don't support mic1 at the moment
6318 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6319 __kmp_mic_type = mic2;
6320 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6321 __kmp_mic_type = mic3;
6322 } else {
6323 __kmp_mic_type = non_mic;
6324 }
6325}
6326
6327#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6328
Jim Cownie5e8470a2013-09-27 10:38:44 +00006329static void
6330__kmp_do_serial_initialize( void )
6331{
6332 int i, gtid;
6333 int size;
6334
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006335 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006336
6337 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6338 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6339 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6340 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6341 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6342
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006343#if OMPT_SUPPORT
6344 ompt_pre_init();
6345#endif
6346
Jim Cownie5e8470a2013-09-27 10:38:44 +00006347 __kmp_validate_locks();
6348
6349 /* Initialize internal memory allocator */
6350 __kmp_init_allocator();
6351
6352 /* Register the library startup via an environment variable
6353 and check to see whether another copy of the library is already
6354 registered. */
6355
6356 __kmp_register_library_startup( );
6357
6358 /* TODO reinitialization of library */
6359 if( TCR_4(__kmp_global.g.g_done) ) {
6360 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6361 }
6362
6363 __kmp_global.g.g_abort = 0;
6364 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6365
6366 /* initialize the locks */
6367#if KMP_USE_ADAPTIVE_LOCKS
6368#if KMP_DEBUG_ADAPTIVE_LOCKS
6369 __kmp_init_speculative_stats();
6370#endif
6371#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006372#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00006373 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006374#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006375 __kmp_init_lock( & __kmp_global_lock );
6376 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6377 __kmp_init_lock( & __kmp_debug_lock );
6378 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6382 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6383 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6384 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6385 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6386 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6387 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6388 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6389 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6390 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6391 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6392 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006393#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006394 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006395#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006396 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6397
6398 /* conduct initialization and initial setup of configuration */
6399
6400 __kmp_runtime_initialize();
6401
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006402#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6403 __kmp_check_mic_type();
6404#endif
6405
Jim Cownie5e8470a2013-09-27 10:38:44 +00006406 // Some global variable initialization moved here from kmp_env_initialize()
6407#ifdef KMP_DEBUG
6408 kmp_diag = 0;
6409#endif
6410 __kmp_abort_delay = 0;
6411
6412 // From __kmp_init_dflt_team_nth()
6413 /* assume the entire machine will be used */
6414 __kmp_dflt_team_nth_ub = __kmp_xproc;
6415 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6416 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6417 }
6418 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6419 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6420 }
6421 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006422
6423 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6424 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006425#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006426 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6427 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006428#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006429 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6430 __kmp_library = library_throughput;
6431 // From KMP_SCHEDULE initialization
6432 __kmp_static = kmp_sch_static_balanced;
6433 // AC: do not use analytical here, because it is non-monotonous
6434 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006435 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006436 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6437 // control parts
6438 #if KMP_FAST_REDUCTION_BARRIER
6439 #define kmp_reduction_barrier_gather_bb ((int)1)
6440 #define kmp_reduction_barrier_release_bb ((int)1)
6441 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6442 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6443 #endif // KMP_FAST_REDUCTION_BARRIER
6444 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6445 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6446 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6447 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6448 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6449 #if KMP_FAST_REDUCTION_BARRIER
6450 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6451 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6452 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6453 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6454 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6455 }
6456 #endif // KMP_FAST_REDUCTION_BARRIER
6457 }
6458 #if KMP_FAST_REDUCTION_BARRIER
6459 #undef kmp_reduction_barrier_release_pat
6460 #undef kmp_reduction_barrier_gather_pat
6461 #undef kmp_reduction_barrier_release_bb
6462 #undef kmp_reduction_barrier_gather_bb
6463 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006464#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006465 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006466 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006467 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006468 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6469 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6470 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6471 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006472#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006473 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006474 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6475 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6476 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006477#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006478#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006479
6480 // From KMP_CHECKS initialization
6481#ifdef KMP_DEBUG
6482 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6483#else
6484 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6485#endif
6486
6487 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6488 __kmp_foreign_tp = TRUE;
6489
6490 __kmp_global.g.g_dynamic = FALSE;
6491 __kmp_global.g.g_dynamic_mode = dynamic_default;
6492
6493 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006494
Jim Cownie5e8470a2013-09-27 10:38:44 +00006495 // Print all messages in message catalog for testing purposes.
6496 #ifdef KMP_DEBUG
6497 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6498 if ( __kmp_str_match_true( val ) ) {
6499 kmp_str_buf_t buffer;
6500 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006501 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006502 __kmp_printf( "%s", buffer.str );
6503 __kmp_str_buf_free( & buffer );
6504 }; // if
6505 __kmp_env_free( & val );
6506 #endif
6507
Jim Cownie181b4bb2013-12-23 17:28:57 +00006508 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006509 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6510 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6511
Jim Cownie5e8470a2013-09-27 10:38:44 +00006512 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6513 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6514 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6515 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6516 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6517 __kmp_thread_pool = NULL;
6518 __kmp_thread_pool_insert_pt = NULL;
6519 __kmp_team_pool = NULL;
6520
6521 /* Allocate all of the variable sized records */
6522 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6523 /* Since allocation is cache-aligned, just add extra padding at the end */
6524 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6525 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6526 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6527
6528 /* init thread counts */
6529 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6530 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6531 __kmp_all_nth = 0;
6532 __kmp_nth = 0;
6533
6534 /* setup the uber master thread and hierarchy */
6535 gtid = __kmp_register_root( TRUE );
6536 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6537 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6538 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6539
6540 KMP_MB(); /* Flush all pending memory write invalidates. */
6541
6542 __kmp_common_initialize();
6543
6544 #if KMP_OS_UNIX
6545 /* invoke the child fork handler */
6546 __kmp_register_atfork();
6547 #endif
6548
Jonathan Peyton99016992015-05-26 17:32:53 +00006549 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006550 {
6551 /* Invoke the exit handler when the program finishes, only for static library.
6552 For dynamic library, we already have _fini and DllMain.
6553 */
6554 int rc = atexit( __kmp_internal_end_atexit );
6555 if ( rc != 0 ) {
6556 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6557 }; // if
6558 }
6559 #endif
6560
6561 #if KMP_HANDLE_SIGNALS
6562 #if KMP_OS_UNIX
6563 /* NOTE: make sure that this is called before the user installs
6564 * their own signal handlers so that the user handlers
6565 * are called first. this way they can return false,
6566 * not call our handler, avoid terminating the library,
6567 * and continue execution where they left off. */
6568 __kmp_install_signals( FALSE );
6569 #endif /* KMP_OS_UNIX */
6570 #if KMP_OS_WINDOWS
6571 __kmp_install_signals( TRUE );
6572 #endif /* KMP_OS_WINDOWS */
6573 #endif
6574
6575 /* we have finished the serial initialization */
6576 __kmp_init_counter ++;
6577
6578 __kmp_init_serial = TRUE;
6579
6580 if (__kmp_settings) {
6581 __kmp_env_print();
6582 }
6583
6584#if OMP_40_ENABLED
6585 if (__kmp_display_env || __kmp_display_env_verbose) {
6586 __kmp_env_print_2();
6587 }
6588#endif // OMP_40_ENABLED
6589
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006590#if OMPT_SUPPORT
6591 ompt_post_init();
6592#endif
6593
Jim Cownie5e8470a2013-09-27 10:38:44 +00006594 KMP_MB();
6595
6596 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6597}
6598
6599void
6600__kmp_serial_initialize( void )
6601{
6602 if ( __kmp_init_serial ) {
6603 return;
6604 }
6605 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6606 if ( __kmp_init_serial ) {
6607 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6608 return;
6609 }
6610 __kmp_do_serial_initialize();
6611 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6612}
6613
6614static void
6615__kmp_do_middle_initialize( void )
6616{
6617 int i, j;
6618 int prev_dflt_team_nth;
6619
6620 if( !__kmp_init_serial ) {
6621 __kmp_do_serial_initialize();
6622 }
6623
6624 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6625
6626 //
6627 // Save the previous value for the __kmp_dflt_team_nth so that
6628 // we can avoid some reinitialization if it hasn't changed.
6629 //
6630 prev_dflt_team_nth = __kmp_dflt_team_nth;
6631
Alp Toker98758b02014-03-02 04:12:06 +00006632#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006633 //
6634 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6635 // number of cores on the machine.
6636 //
6637 __kmp_affinity_initialize();
6638
6639 //
6640 // Run through the __kmp_threads array and set the affinity mask
6641 // for each root thread that is currently registered with the RTL.
6642 //
6643 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6644 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6645 __kmp_affinity_set_init_mask( i, TRUE );
6646 }
6647 }
Alp Toker98758b02014-03-02 04:12:06 +00006648#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006649
6650 KMP_ASSERT( __kmp_xproc > 0 );
6651 if ( __kmp_avail_proc == 0 ) {
6652 __kmp_avail_proc = __kmp_xproc;
6653 }
6654
6655 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6656 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006657 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006658 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6659 j++;
6660 }
6661
6662 if ( __kmp_dflt_team_nth == 0 ) {
6663#ifdef KMP_DFLT_NTH_CORES
6664 //
6665 // Default #threads = #cores
6666 //
6667 __kmp_dflt_team_nth = __kmp_ncores;
6668 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6669 __kmp_dflt_team_nth ) );
6670#else
6671 //
6672 // Default #threads = #available OS procs
6673 //
6674 __kmp_dflt_team_nth = __kmp_avail_proc;
6675 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6676 __kmp_dflt_team_nth ) );
6677#endif /* KMP_DFLT_NTH_CORES */
6678 }
6679
6680 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6681 __kmp_dflt_team_nth = KMP_MIN_NTH;
6682 }
6683 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6684 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6685 }
6686
6687 //
6688 // There's no harm in continuing if the following check fails,
6689 // but it indicates an error in the previous logic.
6690 //
6691 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6692
6693 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6694 //
6695 // Run through the __kmp_threads array and set the num threads icv
6696 // for each root thread that is currently registered with the RTL
6697 // (which has not already explicitly set its nthreads-var with a
6698 // call to omp_set_num_threads()).
6699 //
6700 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6701 kmp_info_t *thread = __kmp_threads[ i ];
6702 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006703 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006704
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006705 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006706 }
6707 }
6708 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6709 __kmp_dflt_team_nth) );
6710
6711#ifdef KMP_ADJUST_BLOCKTIME
6712 /* Adjust blocktime to zero if necessary */
6713 /* now that __kmp_avail_proc is set */
6714 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6715 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6716 if ( __kmp_nth > __kmp_avail_proc ) {
6717 __kmp_zero_bt = TRUE;
6718 }
6719 }
6720#endif /* KMP_ADJUST_BLOCKTIME */
6721
6722 /* we have finished middle initialization */
6723 TCW_SYNC_4(__kmp_init_middle, TRUE);
6724
6725 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6726}
6727
6728void
6729__kmp_middle_initialize( void )
6730{
6731 if ( __kmp_init_middle ) {
6732 return;
6733 }
6734 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6735 if ( __kmp_init_middle ) {
6736 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6737 return;
6738 }
6739 __kmp_do_middle_initialize();
6740 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6741}
6742
6743void
6744__kmp_parallel_initialize( void )
6745{
6746 int gtid = __kmp_entry_gtid(); // this might be a new root
6747
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006748 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006749 if( TCR_4(__kmp_init_parallel) ) return;
6750 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6751 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6752
6753 /* TODO reinitialization after we have already shut down */
6754 if( TCR_4(__kmp_global.g.g_done) ) {
6755 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6756 __kmp_infinite_loop();
6757 }
6758
6759 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6760 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6761 */
6762 if( !__kmp_init_middle ) {
6763 __kmp_do_middle_initialize();
6764 }
6765
6766 /* begin initialization */
6767 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6768 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6769
6770#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6771 //
6772 // Save the FP control regs.
6773 // Worker threads will set theirs to these values at thread startup.
6774 //
6775 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6776 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6777 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6778#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6779
6780#if KMP_OS_UNIX
6781# if KMP_HANDLE_SIGNALS
6782 /* must be after __kmp_serial_initialize */
6783 __kmp_install_signals( TRUE );
6784# endif
6785#endif
6786
6787 __kmp_suspend_initialize();
6788
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006789#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006790 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6791 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6792 }
6793#else
6794 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6795 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6796 }
6797#endif
6798
6799 if ( __kmp_version ) {
6800 __kmp_print_version_2();
6801 }
6802
Jim Cownie5e8470a2013-09-27 10:38:44 +00006803 /* we have finished parallel initialization */
6804 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6805
6806 KMP_MB();
6807 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6808
6809 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6810}
6811
6812
6813/* ------------------------------------------------------------------------ */
6814
6815void
6816__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6817 kmp_team_t *team )
6818{
6819 kmp_disp_t *dispatch;
6820
6821 KMP_MB();
6822
6823 /* none of the threads have encountered any constructs, yet. */
6824 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006825#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006826 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006827#endif /* KMP_CACHE_MANAGE */
6828 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6829 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006830 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6831 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006832
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006833 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006834#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006835 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6836#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006837 if( __kmp_env_consistency_check )
6838 __kmp_push_parallel( gtid, team->t.t_ident );
6839
6840 KMP_MB(); /* Flush all pending memory write invalidates. */
6841}
6842
6843void
6844__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6845 kmp_team_t *team )
6846{
6847 if( __kmp_env_consistency_check )
6848 __kmp_pop_parallel( gtid, team->t.t_ident );
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006849
6850 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006851}
6852
6853int
6854__kmp_invoke_task_func( int gtid )
6855{
6856 int rc;
6857 int tid = __kmp_tid_from_gtid( gtid );
6858 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006859 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006860
6861 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6862#if USE_ITT_BUILD
6863 if ( __itt_stack_caller_create_ptr ) {
6864 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6865 }
6866#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006867#if INCLUDE_SSC_MARKS
6868 SSC_MARK_INVOKING();
6869#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006870
6871#if OMPT_SUPPORT
6872 void *dummy;
6873 void **exit_runtime_p;
6874 ompt_task_id_t my_task_id;
6875 ompt_parallel_id_t my_parallel_id;
6876
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006877 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006878 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6879 ompt_task_info.frame.exit_runtime_frame);
6880 } else {
6881 exit_runtime_p = &dummy;
6882 }
6883
6884#if OMPT_TRACE
6885 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6886 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006887 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006888 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6889 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6890 my_parallel_id, my_task_id);
6891 }
6892#endif
6893#endif
6894
Jonathan Peyton45be4502015-08-11 21:36:41 +00006895 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006896 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6897 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006898 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6899 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006900#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006901 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006902#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006903 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006904#if OMPT_SUPPORT
6905 *exit_runtime_p = NULL;
6906#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006907 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006908
Jim Cownie5e8470a2013-09-27 10:38:44 +00006909#if USE_ITT_BUILD
6910 if ( __itt_stack_caller_create_ptr ) {
6911 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6912 }
6913#endif /* USE_ITT_BUILD */
6914 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6915
6916 return rc;
6917}
6918
6919#if OMP_40_ENABLED
6920void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006922{
6923 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006924 kmp_info_t *thr = __kmp_threads[ gtid ];
6925 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006926 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006927 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6928 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6929 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006930 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006931 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006932 // Launch league of teams now, but not let workers execute
6933 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006934#if INCLUDE_SSC_MARKS
6935 SSC_MARK_FORKING();
6936#endif
6937 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006938 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006939#if OMPT_SUPPORT
6940 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6941#endif
6942 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006943 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6944 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006945#if INCLUDE_SSC_MARKS
6946 SSC_MARK_JOINING();
6947#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006948
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006949 // AC: last parameter "1" eliminates join barrier which won't work because
6950 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006951 __kmp_join_call( loc, gtid
6952#if OMPT_SUPPORT
6953 , fork_context_intel
6954#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006955 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006956}
6957
6958int
6959__kmp_invoke_teams_master( int gtid )
6960{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006961 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6962 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006963 #if KMP_DEBUG
6964 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6965 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6966 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006967 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6968 __kmp_teams_master( gtid );
6969 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006970 return 1;
6971}
6972#endif /* OMP_40_ENABLED */
6973
6974/* this sets the requested number of threads for the next parallel region
6975 * encountered by this team */
6976/* since this should be enclosed in the forkjoin critical section it
6977 * should avoid race conditions with assymmetrical nested parallelism */
6978
6979void
6980__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6981{
6982 kmp_info_t *thr = __kmp_threads[gtid];
6983
6984 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006985 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006986}
6987
6988#if OMP_40_ENABLED
6989
6990/* this sets the requested number of teams for the teams region and/or
6991 * the number of threads for the next parallel region encountered */
6992void
6993__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6994{
6995 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006996 KMP_DEBUG_ASSERT(num_teams >= 0);
6997 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006998
6999 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007000 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007001 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
7002 if ( !__kmp_reserve_warn ) {
7003 __kmp_reserve_warn = 1;
7004 __kmp_msg(
7005 kmp_ms_warning,
7006 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
7007 KMP_HNT( Unset_ALL_THREADS ),
7008 __kmp_msg_null
7009 );
7010 }
7011 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007012 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007013 // Set number of teams (number of threads in the outer "parallel" of the teams)
7014 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7015
7016 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007017 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007018 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007019 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007020 num_threads = __kmp_avail_proc / num_teams;
7021 if( num_teams * num_threads > __kmp_max_nth ) {
7022 // adjust num_threads w/o warning as it is not user setting
7023 num_threads = __kmp_max_nth / num_teams;
7024 }
7025 } else {
7026 if( num_teams * num_threads > __kmp_max_nth ) {
7027 int new_threads = __kmp_max_nth / num_teams;
7028 if ( !__kmp_reserve_warn ) { // user asked for too many threads
7029 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
7030 __kmp_msg(
7031 kmp_ms_warning,
7032 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
7033 KMP_HNT( Unset_ALL_THREADS ),
7034 __kmp_msg_null
7035 );
7036 }
7037 num_threads = new_threads;
7038 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007039 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007040 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007041}
7042
7043
7044//
7045// Set the proc_bind var to use in the following parallel region.
7046//
7047void
7048__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7049{
7050 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007051 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007052}
7053
7054#endif /* OMP_40_ENABLED */
7055
7056/* Launch the worker threads into the microtask. */
7057
7058void
7059__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7060{
7061 kmp_info_t *this_thr = __kmp_threads[gtid];
7062
7063#ifdef KMP_DEBUG
7064 int f;
7065#endif /* KMP_DEBUG */
7066
7067 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007068 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007069 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7070 KMP_MB(); /* Flush all pending memory write invalidates. */
7071
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007072 team->t.t_construct = 0; /* no single directives seen yet */
7073 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007074
7075 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007076 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007077 if ( team->t.t_max_nproc > 1 ) {
7078 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007079 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007080 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007081#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007082 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7083#endif
7084 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007085 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007086 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007087#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007088 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7089#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090 }
7091
7092 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007093 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007094
7095#ifdef KMP_DEBUG
7096 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7097 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7098 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7099 }
7100#endif /* KMP_DEBUG */
7101
7102 /* release the worker threads so they may begin working */
7103 __kmp_fork_barrier( gtid, 0 );
7104}
7105
7106
7107void
7108__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7109{
7110 kmp_info_t *this_thr = __kmp_threads[gtid];
7111
7112 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007113 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007114 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7115 KMP_MB(); /* Flush all pending memory write invalidates. */
7116
7117 /* Join barrier after fork */
7118
7119#ifdef KMP_DEBUG
7120 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7121 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7122 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7123 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7124 __kmp_print_structure();
7125 }
7126 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7127 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7128#endif /* KMP_DEBUG */
7129
7130 __kmp_join_barrier( gtid ); /* wait for everyone */
7131
7132 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007133 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007134}
7135
7136
7137/* ------------------------------------------------------------------------ */
7138/* ------------------------------------------------------------------------ */
7139
7140#ifdef USE_LOAD_BALANCE
7141
7142//
7143// Return the worker threads actively spinning in the hot team, if we
7144// are at the outermost level of parallelism. Otherwise, return 0.
7145//
7146static int
7147__kmp_active_hot_team_nproc( kmp_root_t *root )
7148{
7149 int i;
7150 int retval;
7151 kmp_team_t *hot_team;
7152
7153 if ( root->r.r_active ) {
7154 return 0;
7155 }
7156 hot_team = root->r.r_hot_team;
7157 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7158 return hot_team->t.t_nproc - 1; // Don't count master thread
7159 }
7160
7161 //
7162 // Skip the master thread - it is accounted for elsewhere.
7163 //
7164 retval = 0;
7165 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7166 if ( hot_team->t.t_threads[i]->th.th_active ) {
7167 retval++;
7168 }
7169 }
7170 return retval;
7171}
7172
7173//
7174// Perform an automatic adjustment to the number of
7175// threads used by the next parallel region.
7176//
7177static int
7178__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7179{
7180 int retval;
7181 int pool_active;
7182 int hot_team_active;
7183 int team_curr_active;
7184 int system_active;
7185
7186 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7187 root, set_nproc ) );
7188 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007189 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007190 KMP_DEBUG_ASSERT( set_nproc > 1 );
7191
7192 if ( set_nproc == 1) {
7193 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7194 return 1;
7195 }
7196
7197 //
7198 // Threads that are active in the thread pool, active in the hot team
7199 // for this particular root (if we are at the outer par level), and
7200 // the currently executing thread (to become the master) are available
7201 // to add to the new team, but are currently contributing to the system
7202 // load, and must be accounted for.
7203 //
7204 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7205 hot_team_active = __kmp_active_hot_team_nproc( root );
7206 team_curr_active = pool_active + hot_team_active + 1;
7207
7208 //
7209 // Check the system load.
7210 //
7211 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7212 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7213 system_active, pool_active, hot_team_active ) );
7214
7215 if ( system_active < 0 ) {
7216 //
7217 // There was an error reading the necessary info from /proc,
7218 // so use the thread limit algorithm instead. Once we set
7219 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7220 // we shouldn't wind up getting back here.
7221 //
7222 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7223 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7224
7225 //
7226 // Make this call behave like the thread limit algorithm.
7227 //
7228 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7229 : root->r.r_hot_team->t.t_nproc);
7230 if ( retval > set_nproc ) {
7231 retval = set_nproc;
7232 }
7233 if ( retval < KMP_MIN_NTH ) {
7234 retval = KMP_MIN_NTH;
7235 }
7236
7237 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7238 return retval;
7239 }
7240
7241 //
7242 // There is a slight delay in the load balance algorithm in detecting
7243 // new running procs. The real system load at this instant should be
7244 // at least as large as the #active omp thread that are available to
7245 // add to the team.
7246 //
7247 if ( system_active < team_curr_active ) {
7248 system_active = team_curr_active;
7249 }
7250 retval = __kmp_avail_proc - system_active + team_curr_active;
7251 if ( retval > set_nproc ) {
7252 retval = set_nproc;
7253 }
7254 if ( retval < KMP_MIN_NTH ) {
7255 retval = KMP_MIN_NTH;
7256 }
7257
7258 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7259 return retval;
7260} // __kmp_load_balance_nproc()
7261
7262#endif /* USE_LOAD_BALANCE */
7263
Jim Cownie5e8470a2013-09-27 10:38:44 +00007264/* ------------------------------------------------------------------------ */
7265/* ------------------------------------------------------------------------ */
7266
7267/* NOTE: this is called with the __kmp_init_lock held */
7268void
7269__kmp_cleanup( void )
7270{
7271 int f;
7272
7273 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7274
7275 if (TCR_4(__kmp_init_parallel)) {
7276#if KMP_HANDLE_SIGNALS
7277 __kmp_remove_signals();
7278#endif
7279 TCW_4(__kmp_init_parallel, FALSE);
7280 }
7281
7282 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007283#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007284 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007285#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007286 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007287 TCW_4(__kmp_init_middle, FALSE);
7288 }
7289
7290 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7291
7292 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007293 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007294 __kmp_init_serial = FALSE;
7295 }
7296
7297 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7298 if ( __kmp_root[ f ] != NULL ) {
7299 __kmp_free( __kmp_root[ f ] );
7300 __kmp_root[ f ] = NULL;
7301 }
7302 }
7303 __kmp_free( __kmp_threads );
7304 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7305 // freeing __kmp_root.
7306 __kmp_threads = NULL;
7307 __kmp_root = NULL;
7308 __kmp_threads_capacity = 0;
7309
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007310#if KMP_USE_DYNAMIC_LOCK
7311 __kmp_cleanup_indirect_user_locks();
7312#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007313 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007314#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007315
Alp Toker98758b02014-03-02 04:12:06 +00007316 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007317 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7318 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007319 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007320
7321 #if KMP_USE_ADAPTIVE_LOCKS
7322 #if KMP_DEBUG_ADAPTIVE_LOCKS
7323 __kmp_print_speculative_stats();
7324 #endif
7325 #endif
7326 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7327 __kmp_nested_nth.nth = NULL;
7328 __kmp_nested_nth.size = 0;
7329 __kmp_nested_nth.used = 0;
Jonathan Peytond0365a22017-01-18 06:40:19 +00007330 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types );
7331 __kmp_nested_proc_bind.bind_types = NULL;
7332 __kmp_nested_proc_bind.size = 0;
7333 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007334
7335 __kmp_i18n_catclose();
7336
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007337#if KMP_STATS_ENABLED
Jonathan Peyton5375fe82016-11-14 21:13:44 +00007338 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007339#endif
7340
Jim Cownie5e8470a2013-09-27 10:38:44 +00007341 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7342}
7343
7344/* ------------------------------------------------------------------------ */
7345/* ------------------------------------------------------------------------ */
7346
7347int
7348__kmp_ignore_mppbeg( void )
7349{
7350 char *env;
7351
7352 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7353 if (__kmp_str_match_false( env ))
7354 return FALSE;
7355 }
7356 // By default __kmpc_begin() is no-op.
7357 return TRUE;
7358}
7359
7360int
7361__kmp_ignore_mppend( void )
7362{
7363 char *env;
7364
7365 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7366 if (__kmp_str_match_false( env ))
7367 return FALSE;
7368 }
7369 // By default __kmpc_end() is no-op.
7370 return TRUE;
7371}
7372
7373void
7374__kmp_internal_begin( void )
7375{
7376 int gtid;
7377 kmp_root_t *root;
7378
7379 /* this is a very important step as it will register new sibling threads
7380 * and assign these new uber threads a new gtid */
7381 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007382 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007383 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7384
7385 if( root->r.r_begin ) return;
7386 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7387 if( root->r.r_begin ) {
7388 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7389 return;
7390 }
7391
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007392 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007393
7394 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7395}
7396
7397
7398/* ------------------------------------------------------------------------ */
7399/* ------------------------------------------------------------------------ */
7400
7401void
7402__kmp_user_set_library (enum library_type arg)
7403{
7404 int gtid;
7405 kmp_root_t *root;
7406 kmp_info_t *thread;
7407
7408 /* first, make sure we are initialized so we can get our gtid */
7409
7410 gtid = __kmp_entry_gtid();
7411 thread = __kmp_threads[ gtid ];
7412
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007413 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007414
7415 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7416 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7417 KMP_WARNING( SetLibraryIncorrectCall );
7418 return;
7419 }
7420
7421 switch ( arg ) {
7422 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007423 thread->th.th_set_nproc = 0;
7424 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007425 break;
7426 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007427 thread->th.th_set_nproc = 0;
7428 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007429 break;
7430 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007431 thread->th.th_set_nproc = 0;
7432 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007433 break;
7434 default:
7435 KMP_FATAL( UnknownLibraryType, arg );
7436 }
7437
7438 __kmp_aux_set_library ( arg );
7439}
7440
7441void
7442__kmp_aux_set_stacksize( size_t arg )
7443{
7444 if (! __kmp_init_serial)
7445 __kmp_serial_initialize();
7446
7447#if KMP_OS_DARWIN
7448 if (arg & (0x1000 - 1)) {
7449 arg &= ~(0x1000 - 1);
7450 if(arg + 0x1000) /* check for overflow if we round up */
7451 arg += 0x1000;
7452 }
7453#endif
7454 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7455
7456 /* only change the default stacksize before the first parallel region */
7457 if (! TCR_4(__kmp_init_parallel)) {
7458 size_t value = arg; /* argument is in bytes */
7459
7460 if (value < __kmp_sys_min_stksize )
7461 value = __kmp_sys_min_stksize ;
7462 else if (value > KMP_MAX_STKSIZE)
7463 value = KMP_MAX_STKSIZE;
7464
7465 __kmp_stksize = value;
7466
7467 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7468 }
7469
7470 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7471}
7472
7473/* set the behaviour of the runtime library */
7474/* TODO this can cause some odd behaviour with sibling parallelism... */
7475void
7476__kmp_aux_set_library (enum library_type arg)
7477{
7478 __kmp_library = arg;
7479
7480 switch ( __kmp_library ) {
7481 case library_serial :
7482 {
7483 KMP_INFORM( LibraryIsSerial );
7484 (void) __kmp_change_library( TRUE );
7485 }
7486 break;
7487 case library_turnaround :
7488 (void) __kmp_change_library( TRUE );
7489 break;
7490 case library_throughput :
7491 (void) __kmp_change_library( FALSE );
7492 break;
7493 default:
7494 KMP_FATAL( UnknownLibraryType, arg );
7495 }
7496}
7497
7498/* ------------------------------------------------------------------------ */
7499/* ------------------------------------------------------------------------ */
7500
7501void
7502__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7503{
7504 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007505#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007506 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007507#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007508 int bt_set;
7509
7510 __kmp_save_internal_controls( thread );
7511
7512 /* Normalize and set blocktime for the teams */
7513 if (blocktime < KMP_MIN_BLOCKTIME)
7514 blocktime = KMP_MIN_BLOCKTIME;
7515 else if (blocktime > KMP_MAX_BLOCKTIME)
7516 blocktime = KMP_MAX_BLOCKTIME;
7517
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007518 set__blocktime_team( thread->th.th_team, tid, blocktime );
7519 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007520
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007521#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007522 /* Calculate and set blocktime intervals for the teams */
7523 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7524
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007525 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7526 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007527#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007528
7529 /* Set whether blocktime has been set to "TRUE" */
7530 bt_set = TRUE;
7531
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007532 set__bt_set_team( thread->th.th_team, tid, bt_set );
7533 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007534#if KMP_USE_MONITOR
Samuel Antao33515192016-10-20 13:20:17 +00007535 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7536 "bt_intervals=%d, monitor_updates=%d\n",
7537 __kmp_gtid_from_tid(tid, thread->th.th_team),
7538 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7539 __kmp_monitor_wakeups));
7540#else
7541 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7542 __kmp_gtid_from_tid(tid, thread->th.th_team),
7543 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007544#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007545}
7546
7547void
7548__kmp_aux_set_defaults(
7549 char const * str,
7550 int len
7551) {
7552 if ( ! __kmp_init_serial ) {
7553 __kmp_serial_initialize();
7554 };
7555 __kmp_env_initialize( str );
7556
7557 if (__kmp_settings
7558#if OMP_40_ENABLED
7559 || __kmp_display_env || __kmp_display_env_verbose
7560#endif // OMP_40_ENABLED
7561 ) {
7562 __kmp_env_print();
7563 }
7564} // __kmp_aux_set_defaults
7565
7566/* ------------------------------------------------------------------------ */
7567
7568/*
7569 * internal fast reduction routines
7570 */
7571
Jim Cownie5e8470a2013-09-27 10:38:44 +00007572PACKED_REDUCTION_METHOD_T
7573__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7574 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7575 kmp_critical_name *lck )
7576{
7577
7578 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7579 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7580 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7581 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7582
7583 PACKED_REDUCTION_METHOD_T retval;
7584
7585 int team_size;
7586
7587 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7588 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7589
7590 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7591 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7592
7593 retval = critical_reduce_block;
7594
7595 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7596
7597 if( team_size == 1 ) {
7598
7599 retval = empty_reduce_block;
7600
7601 } else {
7602
7603 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7604 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7605
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007606 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007607
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007608 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007609
7610 int teamsize_cutoff = 4;
7611
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007612#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7613 if( __kmp_mic_type != non_mic ) {
7614 teamsize_cutoff = 8;
7615 }
7616#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007617 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007618 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007619 if ( atomic_available ) {
7620 retval = atomic_reduce_block;
7621 }
7622 } else {
7623 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7624 }
7625 } else if ( atomic_available ) {
7626 retval = atomic_reduce_block;
7627 }
7628 #else
7629 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007630 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007631
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00007632 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007633
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007634 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007635
Jim Cownie5e8470a2013-09-27 10:38:44 +00007636 // basic tuning
7637
7638 if( atomic_available ) {
7639 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7640 retval = atomic_reduce_block;
7641 }
7642 } // otherwise: use critical section
7643
7644 #elif KMP_OS_DARWIN
7645
Jim Cownie5e8470a2013-09-27 10:38:44 +00007646 if( atomic_available && ( num_vars <= 3 ) ) {
7647 retval = atomic_reduce_block;
7648 } else if( tree_available ) {
7649 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7650 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7651 }
7652 } // otherwise: use critical section
7653
7654 #else
7655 #error "Unknown or unsupported OS"
7656 #endif
7657
7658 #else
7659 #error "Unknown or unsupported architecture"
7660 #endif
7661
7662 }
7663
Jim Cownie5e8470a2013-09-27 10:38:44 +00007664 // KMP_FORCE_REDUCTION
7665
Andrey Churbanovec23a952015-08-17 10:12:12 +00007666 // If the team is serialized (team_size == 1), ignore the forced reduction
7667 // method and stay with the unsynchronized method (empty_reduce_block)
7668 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007669
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007670 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007671
7672 int atomic_available, tree_available;
7673
7674 switch( ( forced_retval = __kmp_force_reduction_method ) )
7675 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007676 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007677 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007678 break;
7679
7680 case atomic_reduce_block:
7681 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007682 if( ! atomic_available ) {
7683 KMP_WARNING(RedMethodNotSupported, "atomic");
7684 forced_retval = critical_reduce_block;
7685 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007686 break;
7687
7688 case tree_reduce_block:
7689 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007690 if( ! tree_available ) {
7691 KMP_WARNING(RedMethodNotSupported, "tree");
7692 forced_retval = critical_reduce_block;
7693 } else {
7694 #if KMP_FAST_REDUCTION_BARRIER
7695 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7696 #endif
7697 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007698 break;
7699
7700 default:
7701 KMP_ASSERT( 0 ); // "unsupported method specified"
7702 }
7703
7704 retval = forced_retval;
7705 }
7706
7707 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7708
7709 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7710 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7711
7712 return ( retval );
7713}
7714
7715// this function is for testing set/get/determine reduce method
7716kmp_int32
7717__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007718 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007719}
7720
7721/* ------------------------------------------------------------------------ */