blob: 478ac610af788d0d716f5c4ce781a115ea7824fc [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_runtime.c -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_atomic.h"
18#include "kmp_wrapper_getpid.h"
19#include "kmp_environment.h"
20#include "kmp_itt.h"
21#include "kmp_str.h"
22#include "kmp_settings.h"
23#include "kmp_i18n.h"
24#include "kmp_io.h"
25#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000026#include "kmp_stats.h"
27#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000028
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000029#if OMPT_SUPPORT
30#include "ompt-specific.h"
31#endif
32
Jim Cownie5e8470a2013-09-27 10:38:44 +000033/* these are temporary issues to be dealt with */
34#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#if KMP_OS_WINDOWS
37#include <process.h>
38#endif
39
40
41#if defined(KMP_GOMP_COMPAT)
42char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
43#endif /* defined(KMP_GOMP_COMPAT) */
44
45char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000046#if OMP_45_ENABLED
47 "4.5 (201511)";
48#elif OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000050#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +000051 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000052#endif
53
54#ifdef KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +000055char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#endif /* KMP_DEBUG */
57
Jim Cownie181b4bb2013-12-23 17:28:57 +000058#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
59
Jim Cownie5e8470a2013-09-27 10:38:44 +000060/* ------------------------------------------------------------------------ */
61/* ------------------------------------------------------------------------ */
62
63kmp_info_t __kmp_monitor;
64
65/* ------------------------------------------------------------------------ */
66/* ------------------------------------------------------------------------ */
67
68/* Forward declarations */
69
70void __kmp_cleanup( void );
71
72static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000073static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
Jonathan Peyton2321d572015-06-08 19:25:25 +000074#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +000075static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
Jonathan Peyton2321d572015-06-08 19:25:25 +000076#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000077static void __kmp_do_serial_initialize( void );
Jim Cownie4cc4bb42014-10-07 16:25:50 +000078void __kmp_fork_barrier( int gtid, int tid );
79void __kmp_join_barrier( int gtid );
80void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +000081
Jim Cownie5e8470a2013-09-27 10:38:44 +000082#ifdef USE_LOAD_BALANCE
83static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
84#endif
85
86static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000087#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +000088static int __kmp_unregister_root_other_thread( int gtid );
Jonathan Peyton2321d572015-06-08 19:25:25 +000089#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000090static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
91static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
92static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
93
94/* ------------------------------------------------------------------------ */
95/* ------------------------------------------------------------------------ */
96
97/* Calculate the identifier of the current thread */
98/* fast (and somewhat portable) way to get unique */
99/* identifier of executing thread. */
100/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
101
102int
103__kmp_get_global_thread_id( )
104{
105 int i;
106 kmp_info_t **other_threads;
107 size_t stack_data;
108 char *stack_addr;
109 size_t stack_size;
110 char *stack_base;
111
112 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth ));
114
115 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
116 parallel region, made it return KMP_GTID_DNE to force serial_initialize by
117 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
118 __kmp_init_gtid for this to work. */
119
120 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
121
122#ifdef KMP_TDATA_GTID
123 if ( TCR_4(__kmp_gtid_mode) >= 3) {
124 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
125 return __kmp_gtid;
126 }
127#endif
128 if ( TCR_4(__kmp_gtid_mode) >= 2) {
129 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
130 return __kmp_gtid_get_specific();
131 }
132 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
133
134 stack_addr = (char*) & stack_data;
135 other_threads = __kmp_threads;
136
137 /*
138 ATT: The code below is a source of potential bugs due to unsynchronized access to
139 __kmp_threads array. For example:
140 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
141 2. Current thread is suspended by OS.
142 3. Another thread unregisters and finishes (debug versions of free() may fill memory
143 with something like 0xEF).
144 4. Current thread is resumed.
145 5. Current thread reads junk from *thr.
146 TODO: Fix it.
147 --ln
148 */
149
150 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
151
152 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
153 if( !thr ) continue;
154
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157
158 /* stack grows down -- search through all of the active threads */
159
160 if( stack_addr <= stack_base ) {
161 size_t stack_diff = stack_base - stack_addr;
162
163 if( stack_diff <= stack_size ) {
164 /* The only way we can be closer than the allocated */
165 /* stack size is if we are running on this thread. */
166 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
167 return i;
168 }
169 }
170 }
171
172 /* get specific to try and determine our gtid */
173 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
174 "thread, using TLS\n" ));
175 i = __kmp_gtid_get_specific();
176
177 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
178
179 /* if we havn't been assigned a gtid, then return code */
180 if( i<0 ) return i;
181
182 /* dynamically updated stack window for uber threads to avoid get_specific call */
183 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
184 KMP_FATAL( StackOverflow, i );
185 }
186
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000187 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000188 if( stack_addr > stack_base ) {
189 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
190 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
191 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
192 } else {
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
194 }
195
196 /* Reprint stack bounds for ubermaster since they have been refined */
197 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000198 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
199 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000200 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000201 other_threads[i]->th.th_info.ds.ds_stacksize,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 "th_%d stack (refinement)", i );
203 }
204 return i;
205}
206
207int
208__kmp_get_global_thread_id_reg( )
209{
210 int gtid;
211
212 if ( !__kmp_init_serial ) {
213 gtid = KMP_GTID_DNE;
214 } else
215#ifdef KMP_TDATA_GTID
216 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
217 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
218 gtid = __kmp_gtid;
219 } else
220#endif
221 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
222 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
223 gtid = __kmp_gtid_get_specific();
224 } else {
225 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
226 gtid = __kmp_get_global_thread_id();
227 }
228
229 /* we must be a new uber master sibling thread */
230 if( gtid == KMP_GTID_DNE ) {
231 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
232 "Registering a new gtid.\n" ));
233 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
234 if( !__kmp_init_serial ) {
235 __kmp_do_serial_initialize();
236 gtid = __kmp_gtid_get_specific();
237 } else {
238 gtid = __kmp_register_root(FALSE);
239 }
240 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
241 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
242 }
243
244 KMP_DEBUG_ASSERT( gtid >=0 );
245
246 return gtid;
247}
248
249/* caller must hold forkjoin_lock */
250void
251__kmp_check_stack_overlap( kmp_info_t *th )
252{
253 int f;
254 char *stack_beg = NULL;
255 char *stack_end = NULL;
256 int gtid;
257
258 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
259 if ( __kmp_storage_map ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000260 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
261 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
263 gtid = __kmp_gtid_from_thread( th );
264
265 if (gtid == KMP_GTID_MONITOR) {
266 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
267 "th_%s stack (%s)", "mon",
268 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
269 } else {
270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%d stack (%s)", gtid,
272 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
273 }
274 }
275
276 /* No point in checking ubermaster threads since they use refinement and cannot overlap */
Andrey Churbanovbebb5402015-03-03 16:19:57 +0000277 gtid = __kmp_gtid_from_thread( th );
278 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000279 {
280 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
281 if ( stack_beg == NULL ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000282 stack_end = (char *) th->th.th_info.ds.ds_stackbase;
283 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 }
285
286 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
287 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
288
289 if( f_th && f_th != th ) {
290 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
291 char *other_stack_beg = other_stack_end -
292 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
293 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
294 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
295
296 /* Print the other stack values before the abort */
297 if ( __kmp_storage_map )
298 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
299 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
300 "th_%d stack (overlapped)",
301 __kmp_gtid_from_thread( f_th ) );
302
303 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
304 }
305 }
306 }
307 }
308 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
309}
310
311
312/* ------------------------------------------------------------------------ */
313
Jim Cownie5e8470a2013-09-27 10:38:44 +0000314/* ------------------------------------------------------------------------ */
315
316void
317__kmp_infinite_loop( void )
318{
319 static int done = FALSE;
320
321 while (! done) {
322 KMP_YIELD( 1 );
323 }
324}
325
326#define MAX_MESSAGE 512
327
328void
329__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
330 char buffer[MAX_MESSAGE];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331 va_list ap;
332
333 va_start( ap, format);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000334 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
336 __kmp_vprintf( kmp_err, buffer, ap );
337#if KMP_PRINT_DATA_PLACEMENT
Jonathan Peyton91b78702015-06-08 19:39:07 +0000338 int node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339 if(gtid >= 0) {
340 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
341 if( __kmp_storage_map_verbose ) {
342 node = __kmp_get_host_node(p1);
343 if(node < 0) /* doesn't work, so don't try this next time */
344 __kmp_storage_map_verbose = FALSE;
345 else {
346 char *last;
347 int lastNode;
348 int localProc = __kmp_get_cpu_from_gtid(gtid);
349
350 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
351 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
352 if(localProc >= 0)
353 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
354 else
355 __kmp_printf_no_lock(" GTID %d\n", gtid);
356# if KMP_USE_PRCTL
357/* The more elaborate format is disabled for now because of the prctl hanging bug. */
358 do {
359 last = p1;
360 lastNode = node;
361 /* This loop collates adjacent pages with the same host node. */
362 do {
363 (char*)p1 += PAGE_SIZE;
364 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
365 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
366 (char*)p1 - 1, lastNode);
367 } while(p1 <= p2);
368# else
369 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
370 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
371 if(p1 < p2) {
372 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
373 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
374 }
375# endif
376 }
377 }
378 } else
379 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
380 }
381#endif /* KMP_PRINT_DATA_PLACEMENT */
382 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
383}
384
385void
386__kmp_warn( char const * format, ... )
387{
388 char buffer[MAX_MESSAGE];
389 va_list ap;
390
391 if ( __kmp_generate_warnings == kmp_warnings_off ) {
392 return;
393 }
394
395 va_start( ap, format );
396
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000397 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000398 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
399 __kmp_vprintf( kmp_err, buffer, ap );
400 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
401
402 va_end( ap );
403}
404
405void
406__kmp_abort_process()
407{
408
409 // Later threads may stall here, but that's ok because abort() will kill them.
410 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
411
412 if ( __kmp_debug_buf ) {
413 __kmp_dump_debug_buffer();
414 }; // if
415
416 if ( KMP_OS_WINDOWS ) {
417 // Let other threads know of abnormal termination and prevent deadlock
418 // if abort happened during library initialization or shutdown
419 __kmp_global.g.g_abort = SIGABRT;
420
421 /*
422 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
423 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
424 works well, but this function is not available in VS7 (this is not problem for DLL, but
425 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
426 not help, at least in some versions of MS C RTL.
427
428 It seems following sequence is the only way to simulate abort() and avoid pop-up error
429 box.
430 */
431 raise( SIGABRT );
432 _exit( 3 ); // Just in case, if signal ignored, exit anyway.
433 } else {
434 abort();
435 }; // if
436
437 __kmp_infinite_loop();
438 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
439
440} // __kmp_abort_process
441
442void
443__kmp_abort_thread( void )
444{
445 // TODO: Eliminate g_abort global variable and this function.
446 // In case of abort just call abort(), it will kill all the threads.
447 __kmp_infinite_loop();
448} // __kmp_abort_thread
449
450/* ------------------------------------------------------------------------ */
451
452/*
453 * Print out the storage map for the major kmp_info_t thread data structures
454 * that are allocated together.
455 */
456
457static void
458__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
459{
460 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
461
462 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
463 "th_%d.th_info", gtid );
464
465 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
466 "th_%d.th_local", gtid );
467
468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
470
471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier+1],
473 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
474
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
476 &thr->th.th_bar[bs_forkjoin_barrier+1],
477 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
478
479 #if KMP_FAST_REDUCTION_BARRIER
480 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier+1],
482 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
483 #endif // KMP_FAST_REDUCTION_BARRIER
484}
485
486/*
487 * Print out the storage map for the major kmp_team_t team data structures
488 * that are allocated together.
489 */
490
491static void
492__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
493{
Jonathan Peyton067325f2016-05-31 19:01:15 +0000494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
496 header, team_id );
497
498 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
499 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
500
501
502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
503 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
504
505 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
506 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
507
508 #if KMP_FAST_REDUCTION_BARRIER
509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
510 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
511 #endif // KMP_FAST_REDUCTION_BARRIER
512
513 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
514 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
515
516 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
517 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
518
519 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
520 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
521 header, team_id );
522
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
524 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
525 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
526}
527
528static void __kmp_init_allocator() {}
529static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
531/* ------------------------------------------------------------------------ */
532
Jonathan Peyton99016992015-05-26 17:32:53 +0000533#ifdef KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534# if KMP_OS_WINDOWS
535
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536static void
537__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
538 // TODO: Change to __kmp_break_bootstrap_lock().
539 __kmp_init_bootstrap_lock( lck ); // make the lock released
540}
541
542static void
543__kmp_reset_locks_on_process_detach( int gtid_req ) {
544 int i;
545 int thread_count;
546
547 // PROCESS_DETACH is expected to be called by a thread
548 // that executes ProcessExit() or FreeLibrary().
549 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
550 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
551 // However, in fact, some threads can be still alive here, although being about to be terminated.
552 // The threads in the array with ds_thread==0 are most suspicious.
553 // Actually, it can be not safe to access the __kmp_threads[].
554
555 // TODO: does it make sense to check __kmp_roots[] ?
556
557 // Let's check that there are no other alive threads registered with the OMP lib.
558 while( 1 ) {
559 thread_count = 0;
560 for( i = 0; i < __kmp_threads_capacity; ++i ) {
561 if( !__kmp_threads ) continue;
562 kmp_info_t* th = __kmp_threads[ i ];
563 if( th == NULL ) continue;
564 int gtid = th->th.th_info.ds.ds_gtid;
565 if( gtid == gtid_req ) continue;
566 if( gtid < 0 ) continue;
567 DWORD exit_val;
568 int alive = __kmp_is_thread_alive( th, &exit_val );
569 if( alive ) {
570 ++thread_count;
571 }
572 }
573 if( thread_count == 0 ) break; // success
574 }
575
576 // Assume that I'm alone.
577
578 // Now it might be probably safe to check and reset locks.
579 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
580 __kmp_reset_lock( &__kmp_forkjoin_lock );
581 #ifdef KMP_DEBUG
582 __kmp_reset_lock( &__kmp_stdio_lock );
583 #endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584}
585
586BOOL WINAPI
587DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
588 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
589
590 switch( fdwReason ) {
591
592 case DLL_PROCESS_ATTACH:
593 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
594
595 return TRUE;
596
597 case DLL_PROCESS_DETACH:
598 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
599 __kmp_gtid_get_specific() ));
600
601 if( lpReserved != NULL )
602 {
603 // lpReserved is used for telling the difference:
604 // lpReserved == NULL when FreeLibrary() was called,
605 // lpReserved != NULL when the process terminates.
606 // When FreeLibrary() is called, worker threads remain alive.
607 // So they will release the forkjoin lock by themselves.
608 // When the process terminates, worker threads disappear triggering
609 // the problem of unreleased forkjoin lock as described below.
610
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +0000611 // A worker thread can take the forkjoin lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612 // The problem comes up if that worker thread becomes dead
613 // before it releases the forkjoin lock.
614 // The forkjoin lock remains taken, while the thread
615 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
616 // will try to take the forkjoin lock and will always fail,
617 // so that the application will never finish [normally].
618 // This scenario is possible if __kmpc_end() has not been executed.
619 // It looks like it's not a corner case, but common cases:
620 // - the main function was compiled by an alternative compiler;
621 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
622 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
623 // - alive foreign thread prevented __kmpc_end from doing cleanup.
624
625 // This is a hack to work around the problem.
626 // TODO: !!! to figure out something better.
627 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
628 }
629
630 __kmp_internal_end_library( __kmp_gtid_get_specific() );
631
632 return TRUE;
633
634 case DLL_THREAD_ATTACH:
635 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
636
637 /* if we wanted to register new siblings all the time here call
638 * __kmp_get_gtid(); */
639 return TRUE;
640
641 case DLL_THREAD_DETACH:
642 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
643 __kmp_gtid_get_specific() ));
644
645 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
646 return TRUE;
647 }
648
649 return TRUE;
650}
651
652# endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000653#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654
655
656/* ------------------------------------------------------------------------ */
657
658/* Change the library type to "status" and return the old type */
659/* called from within initialization routines where __kmp_initz_lock is held */
660int
661__kmp_change_library( int status )
662{
663 int old_status;
664
665 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
666
667 if (status) {
668 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
669 }
670 else {
671 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
672 }
673
674 return old_status; // return previous setting of whether KMP_LIBRARY=throughput
675}
676
677/* ------------------------------------------------------------------------ */
678/* ------------------------------------------------------------------------ */
679
680/* __kmp_parallel_deo --
681 * Wait until it's our turn.
682 */
683void
684__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
685{
686 int gtid = *gtid_ref;
687#ifdef BUILD_PARALLEL_ORDERED
688 kmp_team_t *team = __kmp_team_from_gtid( gtid );
689#endif /* BUILD_PARALLEL_ORDERED */
690
691 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000692 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#if KMP_USE_DYNAMIC_LOCK
694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
695#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000697#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698 }
699#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000700 if( !team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000702 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703 KMP_MB();
704 }
705#endif /* BUILD_PARALLEL_ORDERED */
706}
707
708/* __kmp_parallel_dxo --
709 * Signal the next task.
710 */
711
712void
713__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
714{
715 int gtid = *gtid_ref;
716#ifdef BUILD_PARALLEL_ORDERED
717 int tid = __kmp_tid_from_gtid( gtid );
718 kmp_team_t *team = __kmp_team_from_gtid( gtid );
719#endif /* BUILD_PARALLEL_ORDERED */
720
721 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000722 if( __kmp_threads[gtid]->th.th_root->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
724 }
725#ifdef BUILD_PARALLEL_ORDERED
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000726 if ( ! team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000727 KMP_MB(); /* Flush all pending memory write invalidates. */
728
729 /* use the tid of the next thread in this team */
730 /* TODO repleace with general release procedure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000731 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000732
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000733#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000734 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000735 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
736 /* accept blame for "ordered" waiting */
737 kmp_info_t *this_thread = __kmp_threads[gtid];
738 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
739 this_thread->th.ompt_thread_info.wait_id);
740 }
741#endif
742
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743 KMP_MB(); /* Flush all pending memory write invalidates. */
744 }
745#endif /* BUILD_PARALLEL_ORDERED */
746}
747
748/* ------------------------------------------------------------------------ */
749/* ------------------------------------------------------------------------ */
750
751/* ------------------------------------------------------------------------ */
752/* ------------------------------------------------------------------------ */
753
754/* The BARRIER for a SINGLE process section is always explicit */
755
756int
757__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
758{
759 int status;
760 kmp_info_t *th;
761 kmp_team_t *team;
762
763 if( ! TCR_4(__kmp_init_parallel) )
764 __kmp_parallel_initialize();
765
766 th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000767 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768 status = 0;
769
770 th->th.th_ident = id_ref;
771
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000772 if ( team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000773 status = 1;
774 } else {
775 kmp_int32 old_this = th->th.th_local.this_construct;
776
777 ++th->th.th_local.this_construct;
778 /* try to set team count to thread count--success means thread got the
779 single block
780 */
781 /* TODO: Should this be acquire or release? */
Jonathan Peytonc1666962016-07-01 17:37:49 +0000782 if (team->t.t_construct == old_this) {
783 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
784 th->th.th_local.this_construct);
785 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000786#if USE_ITT_BUILD
787 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
788#if OMP_40_ENABLED
789 th->th.th_teams_microtask == NULL &&
790#endif
791 team->t.t_active_level == 1 )
792 { // Only report metadata by master of active team at level 1
793 __kmp_itt_metadata_single( id_ref );
794 }
795#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796 }
797
798 if( __kmp_env_consistency_check ) {
799 if (status && push_ws) {
800 __kmp_push_workshare( gtid, ct_psingle, id_ref );
801 } else {
802 __kmp_check_workshare( gtid, ct_psingle, id_ref );
803 }
804 }
805#if USE_ITT_BUILD
806 if ( status ) {
807 __kmp_itt_single_start( gtid );
808 }
809#endif /* USE_ITT_BUILD */
810 return status;
811}
812
813void
814__kmp_exit_single( int gtid )
815{
816#if USE_ITT_BUILD
817 __kmp_itt_single_end( gtid );
818#endif /* USE_ITT_BUILD */
819 if( __kmp_env_consistency_check )
820 __kmp_pop_workshare( gtid, ct_psingle, NULL );
821}
822
823
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824/*
825 * determine if we can go parallel or must use a serialized parallel region and
826 * how many threads we can use
827 * set_nproc is the number of threads requested for the team
828 * returns 0 if we should serialize or only use one thread,
829 * otherwise the number of threads to use
830 * The forkjoin lock is held by the caller.
831 */
832static int
833__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
834 int master_tid, int set_nthreads
835#if OMP_40_ENABLED
836 , int enter_teams
837#endif /* OMP_40_ENABLED */
838)
839{
840 int capacity;
841 int new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842 KMP_DEBUG_ASSERT( __kmp_init_serial );
843 KMP_DEBUG_ASSERT( root && parent_team );
844
845 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 // If dyn-var is set, dynamically adjust the number of desired threads,
847 // according to the method specified by dynamic_mode.
848 //
849 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000850 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
851 ;
852 }
853#ifdef USE_LOAD_BALANCE
854 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
855 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
856 if ( new_nthreads == 1 ) {
857 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
858 master_tid ));
859 return 1;
860 }
861 if ( new_nthreads < set_nthreads ) {
862 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
863 master_tid, new_nthreads ));
864 }
865 }
866#endif /* USE_LOAD_BALANCE */
867 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
868 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
869 : root->r.r_hot_team->t.t_nproc);
870 if ( new_nthreads <= 1 ) {
871 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
872 master_tid ));
873 return 1;
874 }
875 if ( new_nthreads < set_nthreads ) {
876 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
877 master_tid, new_nthreads ));
878 }
879 else {
880 new_nthreads = set_nthreads;
881 }
882 }
883 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
884 if ( set_nthreads > 2 ) {
885 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
886 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
887 if ( new_nthreads == 1 ) {
888 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
889 master_tid ));
890 return 1;
891 }
892 if ( new_nthreads < set_nthreads ) {
893 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
894 master_tid, new_nthreads ));
895 }
896 }
897 }
898 else {
899 KMP_ASSERT( 0 );
900 }
901
902 //
903 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
904 //
905 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
906 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
907 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
908 root->r.r_hot_team->t.t_nproc );
909 if ( tl_nthreads <= 0 ) {
910 tl_nthreads = 1;
911 }
912
913 //
914 // If dyn-var is false, emit a 1-time warning.
915 //
916 if ( ! get__dynamic_2( parent_team, master_tid )
917 && ( ! __kmp_reserve_warn ) ) {
918 __kmp_reserve_warn = 1;
919 __kmp_msg(
920 kmp_ms_warning,
921 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
922 KMP_HNT( Unset_ALL_THREADS ),
923 __kmp_msg_null
924 );
925 }
926 if ( tl_nthreads == 1 ) {
927 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
928 master_tid ));
929 return 1;
930 }
931 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
932 master_tid, tl_nthreads ));
933 new_nthreads = tl_nthreads;
934 }
935
Jim Cownie5e8470a2013-09-27 10:38:44 +0000936 //
937 // Check if the threads array is large enough, or needs expanding.
938 //
939 // See comment in __kmp_register_root() about the adjustment if
940 // __kmp_threads[0] == NULL.
941 //
942 capacity = __kmp_threads_capacity;
943 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
944 --capacity;
945 }
946 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
947 root->r.r_hot_team->t.t_nproc ) > capacity ) {
948 //
949 // Expand the threads array.
950 //
951 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
952 root->r.r_hot_team->t.t_nproc ) - capacity;
953 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
954 if ( slotsAdded < slotsRequired ) {
955 //
956 // The threads array was not expanded enough.
957 //
958 new_nthreads -= ( slotsRequired - slotsAdded );
959 KMP_ASSERT( new_nthreads >= 1 );
960
961 //
962 // If dyn-var is false, emit a 1-time warning.
963 //
964 if ( ! get__dynamic_2( parent_team, master_tid )
965 && ( ! __kmp_reserve_warn ) ) {
966 __kmp_reserve_warn = 1;
967 if ( __kmp_tp_cached ) {
968 __kmp_msg(
969 kmp_ms_warning,
970 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
971 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
972 KMP_HNT( PossibleSystemLimitOnThreads ),
973 __kmp_msg_null
974 );
975 }
976 else {
977 __kmp_msg(
978 kmp_ms_warning,
979 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
980 KMP_HNT( SystemLimitOnThreads ),
981 __kmp_msg_null
982 );
983 }
984 }
985 }
986 }
987
988 if ( new_nthreads == 1 ) {
989 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
990 __kmp_get_gtid(), set_nthreads ) );
991 return 1;
992 }
993
994 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
995 __kmp_get_gtid(), new_nthreads, set_nthreads ));
996 return new_nthreads;
997}
998
999/* ------------------------------------------------------------------------ */
1000/* ------------------------------------------------------------------------ */
1001
1002/* allocate threads from the thread pool and assign them to the new team */
1003/* we are assured that there are enough threads available, because we
1004 * checked on that earlier within critical section forkjoin */
1005
1006static void
1007__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1008 kmp_info_t *master_th, int master_gtid )
1009{
1010 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001011 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001012
1013 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1014 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1015 KMP_MB();
1016
1017 /* first, let's setup the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001018 master_th->th.th_info.ds.ds_tid = 0;
1019 master_th->th.th_team = team;
1020 master_th->th.th_team_nproc = team->t.t_nproc;
1021 master_th->th.th_team_master = master_th;
1022 master_th->th.th_team_serialized = FALSE;
1023 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024
1025 /* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001026#if KMP_NESTED_HOT_TEAMS
1027 use_hot_team = 0;
1028 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1029 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
1030 int level = team->t.t_active_level - 1; // index in array of hot teams
1031 if( master_th->th.th_teams_microtask ) { // are we inside the teams?
1032 if( master_th->th.th_teams_size.nteams > 1 ) {
1033 ++level; // level was not increased in teams construct for team_of_masters
1034 }
1035 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1036 master_th->th.th_teams_level == team->t.t_level ) {
1037 ++level; // level was not increased in teams construct for team_of_workers before the parallel
1038 } // team->t.t_level will be increased inside parallel
1039 }
1040 if( level < __kmp_hot_teams_max_level ) {
1041 if( hot_teams[level].hot_team ) {
1042 // hot team has already been allocated for given level
1043 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1044 use_hot_team = 1; // the team is ready to use
1045 } else {
1046 use_hot_team = 0; // AC: threads are not allocated yet
1047 hot_teams[level].hot_team = team; // remember new hot team
1048 hot_teams[level].hot_team_nth = team->t.t_nproc;
1049 }
1050 } else {
1051 use_hot_team = 0;
1052 }
1053 }
1054#else
1055 use_hot_team = team == root->r.r_hot_team;
1056#endif
1057 if ( !use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058
1059 /* install the master thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001060 team->t.t_threads[ 0 ] = master_th;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061 __kmp_initialize_info( master_th, team, 0, master_gtid );
1062
1063 /* now, install the worker threads */
1064 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1065
1066 /* fork or reallocate a new thread and install it in team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001067 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1068 team->t.t_threads[ i ] = thr;
1069 KMP_DEBUG_ASSERT( thr );
1070 KMP_DEBUG_ASSERT( thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071 /* align team and thread arrived states */
Jonathan Peytond26e2132015-09-10 18:44:30 +00001072 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001073 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1074 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1075 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1076 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001077#if OMP_40_ENABLED
1078 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1079 thr->th.th_teams_level = master_th->th.th_teams_level;
1080 thr->th.th_teams_size = master_th->th.th_teams_size;
1081#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082 { // Initialize threads' barrier data.
1083 int b;
1084 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1085 for ( b = 0; b < bs_last_barrier; ++ b ) {
1086 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001087 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001088#if USE_DEBUGGER
1089 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1090#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001091 }; // for b
1092 }
1093 }
1094
Alp Toker98758b02014-03-02 04:12:06 +00001095#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096 __kmp_partition_places( team );
1097#endif
1098
1099 }
1100
1101 KMP_MB();
1102}
1103
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001104#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1105//
1106// Propagate any changes to the floating point control registers out to the team
1107// We try to avoid unnecessary writes to the relevant cache line in the team structure,
1108// so we don't make changes unless they are needed.
1109//
1110inline static void
1111propagateFPControl(kmp_team_t * team)
1112{
1113 if ( __kmp_inherit_fp_control ) {
1114 kmp_int16 x87_fpu_control_word;
1115 kmp_uint32 mxcsr;
1116
1117 // Get master values of FPU control flags (both X87 and vector)
1118 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1119 __kmp_store_mxcsr( &mxcsr );
1120 mxcsr &= KMP_X86_MXCSR_MASK;
1121
1122 // There is no point looking at t_fp_control_saved here.
1123 // If it is TRUE, we still have to update the values if they are different from those we now have.
1124 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
1125 // that the values in the team are the same as those we have.
1126 // So, this code achieves what we need whether or not t_fp_control_saved is true.
1127 // By checking whether the value needs updating we avoid unnecessary writes that would put the
1128 // cache-line into a written state, causing all threads in the team to have to read it again.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001129 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1130 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001131 // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
1132 // So we must ensure it is correct.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001133 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001134 }
1135 else {
1136 // Similarly here. Don't write to this cache-line in the team structure unless we have to.
Jonathan Peyton6b560f02016-07-01 17:54:32 +00001137 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001138 }
1139}
1140
1141// Do the opposite, setting the hardware registers to the updated values from the team.
1142inline static void
1143updateHWFPControl(kmp_team_t * team)
1144{
1145 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1146 //
1147 // Only reset the fp control regs if they have been changed in the team.
1148 // the parallel region that we are exiting.
1149 //
1150 kmp_int16 x87_fpu_control_word;
1151 kmp_uint32 mxcsr;
1152 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1153 __kmp_store_mxcsr( &mxcsr );
1154 mxcsr &= KMP_X86_MXCSR_MASK;
1155
1156 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1157 __kmp_clear_x87_fpu_status_word();
1158 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1159 }
1160
1161 if ( team->t.t_mxcsr != mxcsr ) {
1162 __kmp_load_mxcsr( &team->t.t_mxcsr );
1163 }
1164 }
1165}
1166#else
1167# define propagateFPControl(x) ((void)0)
1168# define updateHWFPControl(x) ((void)0)
1169#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1170
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171static void
1172__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
1173
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001174/*
1175 * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
1176 */
1177void
1178__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
1179{
1180 kmp_info_t *this_thr;
1181 kmp_team_t *serial_team;
1182
1183 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1184
1185 /* Skip all this code for autopar serialized loops since it results in
1186 unacceptable overhead */
1187 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
1188 return;
1189
1190 if( ! TCR_4( __kmp_init_parallel ) )
1191 __kmp_parallel_initialize();
1192
1193 this_thr = __kmp_threads[ global_tid ];
1194 serial_team = this_thr->th.th_serial_team;
1195
1196 /* utilize the serialized team held by this thread */
1197 KMP_DEBUG_ASSERT( serial_team );
1198 KMP_MB();
1199
1200 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001201 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1202 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001203 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1204 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1205 this_thr->th.th_task_team = NULL;
1206 }
1207
1208#if OMP_40_ENABLED
1209 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1210 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1211 proc_bind = proc_bind_false;
1212 }
1213 else if ( proc_bind == proc_bind_default ) {
1214 //
1215 // No proc_bind clause was specified, so use the current value
1216 // of proc-bind-var for this parallel region.
1217 //
1218 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1219 }
1220 //
1221 // Reset for next parallel region
1222 //
1223 this_thr->th.th_set_proc_bind = proc_bind_default;
1224#endif /* OMP_40_ENABLED */
1225
1226 if( this_thr->th.th_team != serial_team ) {
1227 // Nested level will be an index in the nested nthreads array
1228 int level = this_thr->th.th_team->t.t_level;
1229
1230 if( serial_team->t.t_serialized ) {
1231 /* this serial team was already used
1232 * TODO increase performance by making this locks more specific */
1233 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001234
1235 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1236
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001237#if OMPT_SUPPORT
1238 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1239#endif
1240
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001241 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001242#if OMPT_SUPPORT
1243 ompt_parallel_id,
1244#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245#if OMP_40_ENABLED
1246 proc_bind,
1247#endif
1248 & this_thr->th.th_current_task->td_icvs,
1249 0 USE_NESTED_HOT_ARG(NULL) );
1250 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1251 KMP_ASSERT( new_team );
1252
1253 /* setup new serialized team and install it */
1254 new_team->t.t_threads[0] = this_thr;
1255 new_team->t.t_parent = this_thr->th.th_team;
1256 serial_team = new_team;
1257 this_thr->th.th_serial_team = serial_team;
1258
1259 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1260 global_tid, serial_team ) );
1261
1262
1263 /* TODO the above breaks the requirement that if we run out of
1264 * resources, then we can still guarantee that serialized teams
1265 * are ok, since we may need to allocate a new one */
1266 } else {
1267 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1268 global_tid, serial_team ) );
1269 }
1270
1271 /* we have to initialize this serial team */
1272 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1273 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1274 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1275 serial_team->t.t_ident = loc;
1276 serial_team->t.t_serialized = 1;
1277 serial_team->t.t_nproc = 1;
1278 serial_team->t.t_parent = this_thr->th.th_team;
1279 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1280 this_thr->th.th_team = serial_team;
1281 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1282
1283 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
1284 global_tid, this_thr->th.th_current_task ) );
1285 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1286 this_thr->th.th_current_task->td_flags.executing = 0;
1287
1288 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1289
1290 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
1291 each serialized task represented by team->t.t_serialized? */
1292 copy_icvs(
1293 & this_thr->th.th_current_task->td_icvs,
1294 & this_thr->th.th_current_task->td_parent->td_icvs );
1295
1296 // Thread value exists in the nested nthreads array for the next nested level
1297 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1298 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1299 }
1300
1301#if OMP_40_ENABLED
1302 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1303 this_thr->th.th_current_task->td_icvs.proc_bind
1304 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1305 }
1306#endif /* OMP_40_ENABLED */
1307
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001308#if USE_DEBUGGER
1309 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
1310#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001311 this_thr->th.th_info.ds.ds_tid = 0;
1312
1313 /* set thread cache values */
1314 this_thr->th.th_team_nproc = 1;
1315 this_thr->th.th_team_master = this_thr;
1316 this_thr->th.th_team_serialized = 1;
1317
1318 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1319 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1320
1321 propagateFPControl (serial_team);
1322
1323 /* check if we need to allocate dispatch buffers stack */
1324 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1325 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1326 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1327 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1328 }
1329 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1330
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001331#if OMPT_SUPPORT
1332 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1333 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1334#endif
1335
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001336 KMP_MB();
1337
1338 } else {
1339 /* this serialized team is already being used,
1340 * that's fine, just add another nested level */
1341 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1342 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1343 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1344 ++ serial_team->t.t_serialized;
1345 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1346
1347 // Nested level will be an index in the nested nthreads array
1348 int level = this_thr->th.th_team->t.t_level;
1349 // Thread value exists in the nested nthreads array for the next nested level
1350 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1351 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1352 }
1353 serial_team->t.t_level++;
1354 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1355 global_tid, serial_team, serial_team->t.t_level ) );
1356
1357 /* allocate/push dispatch buffers stack */
1358 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1359 {
1360 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1361 __kmp_allocate( sizeof( dispatch_private_info_t ) );
1362 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1363 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1364 }
1365 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1366
1367 KMP_MB();
1368 }
1369
1370 if ( __kmp_env_consistency_check )
1371 __kmp_push_parallel( global_tid, NULL );
1372
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001373}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001374
Jim Cownie5e8470a2013-09-27 10:38:44 +00001375/* most of the work for a fork */
1376/* return true if we really went parallel, false if serialized */
1377int
1378__kmp_fork_call(
1379 ident_t * loc,
1380 int gtid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001381 enum fork_context_e call_context, // Intel, GNU, ...
Jim Cownie5e8470a2013-09-27 10:38:44 +00001382 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001383#if OMPT_SUPPORT
1384 void *unwrapped_task,
1385#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386 microtask_t microtask,
1387 launch_t invoker,
1388/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001389#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 va_list * ap
1391#else
1392 va_list ap
1393#endif
1394 )
1395{
1396 void **argv;
1397 int i;
1398 int master_tid;
1399 int master_this_cons;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400 kmp_team_t *team;
1401 kmp_team_t *parent_team;
1402 kmp_info_t *master_th;
1403 kmp_root_t *root;
1404 int nthreads;
1405 int master_active;
1406 int master_set_numthreads;
1407 int level;
1408#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001409 int active_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001410 int teams_level;
1411#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001412#if KMP_NESTED_HOT_TEAMS
1413 kmp_hot_team_ptr_t **p_hot_teams;
1414#endif
1415 { // KMP_TIME_BLOCK
Jonathan Peyton45be4502015-08-11 21:36:41 +00001416 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1417 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001418
1419 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001420 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1421 /* Some systems prefer the stack for the root thread(s) to start with */
1422 /* some gap from the parent stack to prevent false sharing. */
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001423 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001424 /* These 2 lines below are so this does not get optimized out */
1425 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1426 __kmp_stkpadding += (short)((kmp_int64)dummy);
1427 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428
1429 /* initialize if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
Jim Cownie5e8470a2013-09-27 10:38:44 +00001431 if( ! TCR_4(__kmp_init_parallel) )
1432 __kmp_parallel_initialize();
1433
1434 /* setup current data */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001435 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
1436 parent_team = master_th->th.th_team;
1437 master_tid = master_th->th.th_info.ds.ds_tid;
1438 master_this_cons = master_th->th.th_local.this_construct;
1439 root = master_th->th.th_root;
1440 master_active = root->r.r_active;
1441 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001442
1443#if OMPT_SUPPORT
1444 ompt_parallel_id_t ompt_parallel_id;
1445 ompt_task_id_t ompt_task_id;
1446 ompt_frame_t *ompt_frame;
1447 ompt_task_id_t my_task_id;
1448 ompt_parallel_id_t my_parallel_id;
1449
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001450 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001451 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1452 ompt_task_id = __ompt_get_task_id_internal(0);
1453 ompt_frame = __ompt_get_task_frame_internal(0);
1454 }
1455#endif
1456
Jim Cownie5e8470a2013-09-27 10:38:44 +00001457 // Nested level will be an index in the nested nthreads array
1458 level = parent_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001459 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001460#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001461 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
1462#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001463#if KMP_NESTED_HOT_TEAMS
1464 p_hot_teams = &master_th->th.th_hot_teams;
1465 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1466 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1467 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1468 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1469 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
1470 }
1471#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001472
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001473#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001474 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001475 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1476 int team_size = master_set_numthreads;
1477
1478 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1479 ompt_task_id, ompt_frame, ompt_parallel_id,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001480 team_size, unwrapped_task, OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481 }
1482#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001483
Jim Cownie5e8470a2013-09-27 10:38:44 +00001484 master_th->th.th_ident = loc;
1485
1486#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001487 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1489 // AC: This is start of parallel that is nested inside teams construct.
1490 // The team is actual (hot), all workers are ready at the fork barrier.
1491 // No lock needed to initialize the team a bit, then free workers.
1492 parent_team->t.t_ident = loc;
Jonathan Peyton7cf08d42016-06-16 18:47:38 +00001493 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494 parent_team->t.t_argc = argc;
1495 argv = (void**)parent_team->t.t_argv;
1496 for( i=argc-1; i >= 0; --i )
1497/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001498#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499 *argv++ = va_arg( *ap, void * );
1500#else
1501 *argv++ = va_arg( ap, void * );
1502#endif
1503 /* Increment our nested depth levels, but not increase the serialization */
1504 if ( parent_team == master_th->th.th_serial_team ) {
1505 // AC: we are in serialized parallel
1506 __kmpc_serialized_parallel(loc, gtid);
1507 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1508 parent_team->t.t_serialized--; // AC: need this in order enquiry functions
1509 // work correctly, will restore at join time
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001510
1511#if OMPT_SUPPORT
1512 void *dummy;
1513 void **exit_runtime_p;
1514
1515 ompt_lw_taskteam_t lw_taskteam;
1516
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001517 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001518 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1519 unwrapped_task, ompt_parallel_id);
1520 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1521 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1522
1523 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1524
1525#if OMPT_TRACE
1526 /* OMPT implicit task begin */
1527 my_task_id = lw_taskteam.ompt_task_info.task_id;
1528 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001529 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001530 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1531 my_parallel_id, my_task_id);
1532 }
1533#endif
1534
1535 /* OMPT state */
1536 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1537 } else {
1538 exit_runtime_p = &dummy;
1539 }
1540#endif
1541
Jonathan Peyton45be4502015-08-11 21:36:41 +00001542 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001543 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1544 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001545 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001546#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00001547 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001548#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00001549 );
1550 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001551
1552#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001553 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001554 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001556 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001557
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001558 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001559 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1560 ompt_parallel_id, ompt_task_id);
1561 }
1562
1563 __ompt_lw_taskteam_unlink(master_th);
1564 // reset clear the task id only after unlinking the task
1565 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1566#endif
1567
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001568 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001569 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001570 ompt_parallel_id, ompt_task_id,
1571 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001572 }
1573 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1574 }
1575#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576 return TRUE;
1577 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001578
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 parent_team->t.t_pkfn = microtask;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001580#if OMPT_SUPPORT
1581 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1582#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583 parent_team->t.t_invoke = invoker;
1584 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1585 parent_team->t.t_active_level ++;
1586 parent_team->t.t_level ++;
1587
1588 /* Change number of threads in the team if requested */
1589 if ( master_set_numthreads ) { // The parallel has num_threads clause
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001590 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001591 // AC: only can reduce the number of threads dynamically, cannot increase
1592 kmp_info_t **other_threads = parent_team->t.t_threads;
1593 parent_team->t.t_nproc = master_set_numthreads;
1594 for ( i = 0; i < master_set_numthreads; ++i ) {
1595 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1596 }
1597 // Keep extra threads hot in the team for possible next parallels
1598 }
1599 master_th->th.th_set_nproc = 0;
1600 }
1601
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001602#if USE_DEBUGGER
1603 if ( __kmp_debugging ) { // Let debugger override number of threads.
1604 int nth = __kmp_omp_num_threads( loc );
1605 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
1606 master_set_numthreads = nth;
1607 }; // if
1608 }; // if
1609#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610
1611 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1612 __kmp_internal_fork( loc, gtid, parent_team );
1613 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1614
1615 /* Invoke microtask for MASTER thread */
1616 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1617 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1618
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001619 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001620 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1621 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001622 if (! parent_team->t.t_invoke( gtid )) {
1623 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
1624 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625 }
1626 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1627 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1628 KMP_MB(); /* Flush all pending memory write invalidates. */
1629
1630 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
1631
1632 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001633 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634#endif /* OMP_40_ENABLED */
1635
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001636#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00001637 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001638 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001640#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1643 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001644 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001645#if OMP_40_ENABLED
1646 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1647#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 nthreads = master_set_numthreads ?
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001649 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001650
1651 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
1652 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
1653 if (nthreads > 1) {
1654 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1655#if OMP_40_ENABLED
1656 && !enter_teams
1657#endif /* OMP_40_ENABLED */
1658 ) ) || ( __kmp_library == library_serial ) ) {
1659 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1660 gtid, nthreads ));
1661 nthreads = 1;
1662 }
1663 }
1664 if ( nthreads > 1 ) {
1665 /* determine how many new threads we can use */
1666 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1667
1668 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001669#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001670/* AC: If we execute teams from parallel region (on host), then teams should be created
1671 but each can only have 1 thread if nesting is disabled. If teams called from serial region,
1672 then teams and their threads should be created regardless of the nesting setting. */
Andrey Churbanov92effc42015-08-18 10:08:27 +00001673 , enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001675 );
Andrey Churbanov92effc42015-08-18 10:08:27 +00001676 if ( nthreads == 1 ) {
1677 // Free lock for single thread execution here;
1678 // for multi-thread execution it will be freed later
1679 // after team of threads created and initialized
1680 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1681 }
1682 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683 }
1684 KMP_DEBUG_ASSERT( nthreads > 0 );
1685
1686 /* If we temporarily changed the set number of threads then restore it now */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001687 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001688
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689 /* create a serialized parallel region? */
1690 if ( nthreads == 1 ) {
1691 /* josh todo: hypothetical question: what do we do for OS X*? */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001692#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693 void * args[ argc ];
1694#else
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001695 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001696#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1699
1700 __kmpc_serialized_parallel(loc, gtid);
1701
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001702 if ( call_context == fork_context_intel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703 /* TODO this sucks, use the compiler itself to pass args! :) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705#if OMP_40_ENABLED
1706 if ( !ap ) {
1707 // revert change made in __kmpc_serialized_parallel()
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 master_th->th.th_serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 // Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001710
1711#if OMPT_SUPPORT
1712 void *dummy;
1713 void **exit_runtime_p;
1714
1715 ompt_lw_taskteam_t lw_taskteam;
1716
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001717 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001718 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1719 unwrapped_task, ompt_parallel_id);
1720 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1721 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1722
1723 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1724
1725#if OMPT_TRACE
1726 my_task_id = lw_taskteam.ompt_task_info.task_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001727 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001728 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1729 ompt_parallel_id, my_task_id);
1730 }
1731#endif
1732
1733 /* OMPT state */
1734 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1735 } else {
1736 exit_runtime_p = &dummy;
1737 }
1738#endif
1739
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001740 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001741 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1742 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001743 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1744#if OMPT_SUPPORT
1745 , exit_runtime_p
1746#endif
1747 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001748 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001749
1750#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001751 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001752 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001753 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001754
1755#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001756 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001757 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1758 ompt_parallel_id, ompt_task_id);
1759 }
1760#endif
1761
1762 __ompt_lw_taskteam_unlink(master_th);
1763 // reset clear the task id only after unlinking the task
1764 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1765
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001766 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001767 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001768 ompt_parallel_id, ompt_task_id,
1769 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001770 }
1771 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1772 }
1773#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001774 } else if ( microtask == (microtask_t)__kmp_teams_master ) {
1775 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1776 team = master_th->th.th_team;
1777 //team->t.t_pkfn = microtask;
1778 team->t.t_invoke = invoker;
1779 __kmp_alloc_argv_entries( argc, team, TRUE );
1780 team->t.t_argc = argc;
1781 argv = (void**) team->t.t_argv;
1782 if ( ap ) {
1783 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001784// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001785# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001787# else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001788 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001789# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790 } else {
1791 for( i=0; i < argc; ++i )
1792 // Get args from parent team for teams construct
1793 argv[i] = parent_team->t.t_argv[i];
1794 }
1795 // AC: revert change made in __kmpc_serialized_parallel()
1796 // because initial code in teams should have level=0
1797 team->t.t_level--;
1798 // AC: call special invoker for outer "parallel" of the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001799 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001800 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1801 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001802 invoker(gtid);
1803 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001804 } else {
1805#endif /* OMP_40_ENABLED */
1806 argv = args;
1807 for( i=argc-1; i >= 0; --i )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001808// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001809#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 *argv++ = va_arg( *ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001811#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812 *argv++ = va_arg( ap, void * );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001813#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001815
1816#if OMPT_SUPPORT
1817 void *dummy;
1818 void **exit_runtime_p;
1819
1820 ompt_lw_taskteam_t lw_taskteam;
1821
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001822 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001823 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1824 unwrapped_task, ompt_parallel_id);
1825 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1826 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1827
1828 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1829
1830#if OMPT_TRACE
1831 /* OMPT implicit task begin */
1832 my_task_id = lw_taskteam.ompt_task_info.task_id;
1833 my_parallel_id = ompt_parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001834 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001835 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1836 my_parallel_id, my_task_id);
1837 }
1838#endif
1839
1840 /* OMPT state */
1841 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1842 } else {
1843 exit_runtime_p = &dummy;
1844 }
1845#endif
1846
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001847 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001848 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1849 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001850 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1851#if OMPT_SUPPORT
1852 , exit_runtime_p
1853#endif
1854 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001855 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001856
1857#if OMPT_SUPPORT
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00001858 *exit_runtime_p = NULL;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001859 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860#if OMPT_TRACE
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001861 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001862
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001863 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001864 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1865 my_parallel_id, my_task_id);
1866 }
1867#endif
1868
1869 __ompt_lw_taskteam_unlink(master_th);
1870 // reset clear the task id only after unlinking the task
1871 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1872
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001873 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001874 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00001875 ompt_parallel_id, ompt_task_id,
1876 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001877 }
1878 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1879 }
1880#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881#if OMP_40_ENABLED
1882 }
1883#endif /* OMP_40_ENABLED */
1884 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001885 else if ( call_context == fork_context_gnu ) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886#if OMPT_SUPPORT
1887 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1888 __kmp_allocate(sizeof(ompt_lw_taskteam_t));
1889 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1890 unwrapped_task, ompt_parallel_id);
1891
1892 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001893 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001894 __ompt_lw_taskteam_link(lwt, master_th);
1895#endif
1896
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001897 // we were called from GNU native code
1898 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
1899 return FALSE;
1900 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001902 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903 }
1904
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001906 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 KMP_MB();
1908 return FALSE;
1909 }
1910
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 // GEH: only modify the executing flag in the case when not serialized
1912 // serialized case is handled in kmpc_serialized_parallel
1913 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001914 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1915 master_th->th.th_current_task->td_icvs.max_active_levels ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916 // TODO: GEH - cannot do this assertion because root thread not set up as executing
1917 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1918 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919
1920#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001921 if ( !master_th->th.th_teams_microtask || level > teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922#endif /* OMP_40_ENABLED */
1923 {
1924 /* Increment our nested depth level */
1925 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1926 }
1927
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001930 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1931 nthreads_icv = __kmp_nested_nth.nth[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 }
1933 else {
1934 nthreads_icv = 0; // don't update
1935 }
1936
1937#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1942 proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
1944 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001945 if (proc_bind == proc_bind_default) {
1946 // No proc_bind clause specified; use current proc-bind-var for this parallel region
1947 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 /* else: The proc_bind policy was specified explicitly on parallel clause. This
1950 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951 // Figure the value of proc-bind-var for the child threads.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001952 if ((level+1 < __kmp_nested_proc_bind.used)
1953 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1954 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
1956 }
1957
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 master_th->th.th_set_proc_bind = proc_bind_default;
1960#endif /* OMP_40_ENABLED */
1961
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001962 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001964 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965#endif /* OMP_40_ENABLED */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001966 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 kmp_internal_control_t new_icvs;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 new_icvs.next = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970 if (nthreads_icv > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 new_icvs.nproc = nthreads_icv;
1972 }
1973
1974#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001975 if (proc_bind_icv != proc_bind_default) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 new_icvs.proc_bind = proc_bind_icv;
1977 }
1978#endif /* OMP_40_ENABLED */
1979
1980 /* allocate a new parallel team */
1981 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1982 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001983#if OMPT_SUPPORT
1984 ompt_parallel_id,
1985#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001989 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1990 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 /* allocate a new parallel team */
1992 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
1993 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001994#if OMPT_SUPPORT
1995 ompt_parallel_id,
1996#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002000 &master_th->th.th_current_task->td_icvs, argc
2001 USE_NESTED_HOT_ARG(master_th) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002003 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004
2005 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002006 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2007 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2008 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2009 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2010 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002011#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002012 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002013#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002014 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 // TODO: parent_team->t.t_level == INT_MAX ???
2016#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002017 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002019 int new_level = parent_team->t.t_level + 1;
2020 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2021 new_level = parent_team->t.t_active_level + 1;
2022 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#if OMP_40_ENABLED
2024 } else {
2025 // AC: Do not increase parallel level at start of the teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002026 int new_level = parent_team->t.t_level;
2027 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2028 new_level = parent_team->t.t_active_level;
2029 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 }
2031#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002032 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton6b560f02016-07-01 17:54:32 +00002033 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002034 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002036#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002037 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002038#endif
2039
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 // Update the floating point rounding in the team if required.
2041 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042
2043 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002045#if 0
2046 // Patch out an assertion that trips while the runtime seems to operate correctly.
2047 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002048 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peytonc96dcb02015-07-23 18:58:37 +00002049#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002051 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002052 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002053
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002054 if ( active_level || master_th->th.th_task_team ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002055 // Take a memo of master's task_state
2056 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2057 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
Jonathan Peyton54127982015-11-04 21:37:48 +00002058 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2059 kmp_uint8 *old_stack, *new_stack;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002060 kmp_uint32 i;
Jonathan Peyton54127982015-11-04 21:37:48 +00002061 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002062 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2063 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2064 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002065 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
2066 new_stack[i] = 0;
2067 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002068 old_stack = master_th->th.th_task_state_memo_stack;
2069 master_th->th.th_task_state_memo_stack = new_stack;
Jonathan Peyton54127982015-11-04 21:37:48 +00002070 master_th->th.th_task_state_stack_sz = new_size;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002071 __kmp_free(old_stack);
2072 }
2073 // Store master's task_state on stack
2074 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2075 master_th->th.th_task_state_top++;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002076#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00002077 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
Jonathan Peyton54127982015-11-04 21:37:48 +00002078 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2079 }
2080 else {
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002081#endif
Jonathan Peyton54127982015-11-04 21:37:48 +00002082 master_th->th.th_task_state = 0;
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002083#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00002084 }
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00002085#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002086 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087#if !KMP_NESTED_HOT_TEAMS
2088 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2089#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002090 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
2092 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2093 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2094 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2095 ( team->t.t_master_tid == 0 &&
2096 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2097 KMP_MB();
2098
2099 /* now, setup the arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100 argv = (void**)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101#if OMP_40_ENABLED
2102 if ( ap ) {
2103#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002104 for ( i=argc-1; i >= 0; --i ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002105// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002106#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002107 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108#else
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002109 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110#endif
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002111 KMP_CHECK_UPDATE(*argv, new_argv);
2112 argv++;
2113 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114#if OMP_40_ENABLED
2115 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002116 for ( i=0; i < argc; ++i ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117 // Get args from parent team for teams construct
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002118 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120 }
2121#endif /* OMP_40_ENABLED */
2122
2123 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002124 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002125 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2126 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002127
2128 __kmp_fork_team_threads( root, team, master_th, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002131#if OMPT_SUPPORT
2132 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
2135 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2136
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137#if USE_ITT_BUILD
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002138 if ( team->t.t_active_level == 1 // only report frames at level 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002139# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002140 && !master_th->th.th_teams_microtask // not in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002142 ) {
2143#if USE_ITT_NOTIFY
2144 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2145 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002146 {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002147 kmp_uint64 tmp_time = 0;
2148 if ( __itt_get_timestamp_ptr )
2149 tmp_time = __itt_get_timestamp();
2150 // Internal fork - report frame begin
2151 master_th->th.th_frame_time = tmp_time;
2152 if ( __kmp_forkjoin_frames_mode == 3 )
2153 team->t.t_region_time = tmp_time;
2154 } else // only one notification scheme (either "submit" or "forking/joined", not both)
2155#endif /* USE_ITT_NOTIFY */
2156 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2157 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2158 { // Mark start of "parallel" region for VTune.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002159 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2160 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002161 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162#endif /* USE_ITT_BUILD */
2163
2164 /* now go on and do the work */
2165 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2166 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002167 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2168 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169
2170#if USE_ITT_BUILD
2171 if ( __itt_stack_caller_create_ptr ) {
2172 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2173 }
2174#endif /* USE_ITT_BUILD */
2175
2176#if OMP_40_ENABLED
2177 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2178#endif /* OMP_40_ENABLED */
2179 {
2180 __kmp_internal_fork( loc, gtid, team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002181 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2182 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183 }
2184
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 if (call_context == fork_context_gnu) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2187 return TRUE;
2188 }
2189
2190 /* Invoke microtask for MASTER thread */
2191 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2192 gtid, team->t.t_id, team->t.t_pkfn ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002193 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002194
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002195 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00002196 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2197 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00002198 // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002199 if (! team->t.t_invoke( gtid )) {
2200 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2201 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002202 }
2203 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2204 gtid, team->t.t_id, team->t.t_pkfn ) );
2205 KMP_MB(); /* Flush all pending memory write invalidates. */
2206
2207 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2208
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002209#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002210 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002211 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2212 }
2213#endif
2214
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 return TRUE;
2216}
2217
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002218#if OMPT_SUPPORT
2219static inline void
2220__kmp_join_restore_state(
2221 kmp_info_t *thread,
2222 kmp_team_t *team)
2223{
2224 // restore state outside the region
2225 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2226 ompt_state_work_serial : ompt_state_work_parallel);
2227}
2228
2229static inline void
2230__kmp_join_ompt(
2231 kmp_info_t *thread,
2232 kmp_team_t *team,
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002233 ompt_parallel_id_t parallel_id,
2234 fork_context_e fork_context)
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235{
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002236 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002237 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002238 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002239 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002240 }
2241
Jonas Hahnfeld848d6902016-09-14 13:59:39 +00002242 task_info->frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243 __kmp_join_restore_state(thread,team);
2244}
2245#endif
2246
Jim Cownie5e8470a2013-09-27 10:38:44 +00002247void
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002248__kmp_join_call(ident_t *loc, int gtid
2249#if OMPT_SUPPORT
2250 , enum fork_context_e fork_context
2251#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252#if OMP_40_ENABLED
2253 , int exit_teams
2254#endif /* OMP_40_ENABLED */
2255)
2256{
Jonathan Peyton45be4502015-08-11 21:36:41 +00002257 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002258 kmp_team_t *team;
2259 kmp_team_t *parent_team;
2260 kmp_info_t *master_th;
2261 kmp_root_t *root;
2262 int master_active;
2263 int i;
2264
2265 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2266
2267 /* setup current data */
2268 master_th = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002269 root = master_th->th.th_root;
2270 team = master_th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002271 parent_team = team->t.t_parent;
2272
2273 master_th->th.th_ident = loc;
2274
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002275#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002276 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002277 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2278 }
2279#endif
2280
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002281#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2283 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2284 __kmp_gtid_from_thread( master_th ), team,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002285 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2286 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002288#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289
2290 if( team->t.t_serialized ) {
2291#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002292 if ( master_th->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002293 // We are in teams construct
2294 int level = team->t.t_level;
2295 int tlevel = master_th->th.th_teams_level;
2296 if ( level == tlevel ) {
2297 // AC: we haven't incremented it earlier at start of teams construct,
2298 // so do it here - at the end of teams construct
2299 team->t.t_level++;
2300 } else if ( level == tlevel + 1 ) {
2301 // AC: we are exiting parallel inside teams, need to increment serialization
2302 // in order to restore it in the next call to __kmpc_end_serialized_parallel
2303 team->t.t_serialized++;
2304 }
2305 }
2306#endif /* OMP_40_ENABLED */
2307 __kmpc_end_serialized_parallel( loc, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002308
2309#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002310 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002311 __kmp_join_restore_state(master_th, parent_team);
2312 }
2313#endif
2314
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315 return;
2316 }
2317
2318 master_active = team->t.t_master_active;
2319
2320#if OMP_40_ENABLED
2321 if (!exit_teams)
2322#endif /* OMP_40_ENABLED */
2323 {
2324 // AC: No barrier for internal teams at exit from teams construct.
2325 // But there is barrier for external team (league).
2326 __kmp_internal_join( loc, gtid, team );
2327 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002328#if OMP_40_ENABLED
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002329 else {
2330 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
2331 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002332#endif /* OMP_40_ENABLED */
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002333
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334 KMP_MB();
2335
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002336#if OMPT_SUPPORT
2337 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2338#endif
2339
Jim Cownie5e8470a2013-09-27 10:38:44 +00002340#if USE_ITT_BUILD
2341 if ( __itt_stack_caller_create_ptr ) {
2342 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
2343 }
2344
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002345 // Mark end of "parallel" region for VTune.
2346 if ( team->t.t_active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347# if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002348 && !master_th->th.th_teams_microtask /* not in teams construct */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349# endif /* OMP_40_ENABLED */
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002350 ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00002351 master_th->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002352 // only one notification scheme (either "submit" or "forking/joined", not both)
2353 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2354 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2355 0, loc, master_th->th.th_team_nproc, 1 );
2356 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2357 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2358 __kmp_itt_region_joined( gtid );
2359 } // active_level == 1
Jim Cownie5e8470a2013-09-27 10:38:44 +00002360#endif /* USE_ITT_BUILD */
2361
2362#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002363 if ( master_th->th.th_teams_microtask &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002364 !exit_teams &&
2365 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2366 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2367 // AC: We need to leave the team structure intact at the end
2368 // of parallel inside the teams construct, so that at the next
2369 // parallel same (hot) team works, only adjust nesting levels
2370
2371 /* Decrement our nested depth level */
2372 team->t.t_level --;
2373 team->t.t_active_level --;
2374 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2375
2376 /* Restore number of threads in the team if needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002377 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002378 int old_num = master_th->th.th_team_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002379 int new_num = master_th->th.th_teams_size.nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380 kmp_info_t **other_threads = team->t.t_threads;
2381 team->t.t_nproc = new_num;
2382 for ( i = 0; i < old_num; ++i ) {
2383 other_threads[i]->th.th_team_nproc = new_num;
2384 }
2385 // Adjust states of non-used threads of the team
2386 for ( i = old_num; i < new_num; ++i ) {
2387 // Re-initialize thread's barrier data.
2388 int b;
2389 kmp_balign_t * balign = other_threads[i]->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002390 for ( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002391 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002392 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00002393#if USE_DEBUGGER
2394 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2395#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002397 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2398 // Synchronize thread's task state
2399 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002401 }
2402 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002403
2404#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002405 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002406 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002407 }
2408#endif
2409
Jim Cownie5e8470a2013-09-27 10:38:44 +00002410 return;
2411 }
2412#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002413
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002414 /* do cleanup and restore the parent team */
2415 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2416 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2417
2418 master_th->th.th_dispatch =
2419 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002420
2421 /* jc: The following lock has instructions with REL and ACQ semantics,
2422 separating the parallel user code called in this parallel region
2423 from the serial user code called after this function returns.
2424 */
2425 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2426
2427#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002428 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002429#endif /* OMP_40_ENABLED */
2430 {
2431 /* Decrement our nested depth level */
2432 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2433 }
2434 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2435
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002436#if OMPT_SUPPORT && OMPT_TRACE
2437 if(ompt_enabled){
2438 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2439 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2440 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2441 parallel_id, task_info->task_id);
2442 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00002443 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00002444 task_info->task_id = 0;
2445 }
2446#endif
2447
Jim Cownie5e8470a2013-09-27 10:38:44 +00002448 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2449 0, master_th, team ) );
2450 __kmp_pop_current_task_from_thread( master_th );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451
Alp Toker98758b02014-03-02 04:12:06 +00002452#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002453 //
2454 // Restore master thread's partition.
2455 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002456 master_th->th.th_first_place = team->t.t_first_place;
2457 master_th->th.th_last_place = team->t.t_last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458#endif /* OMP_40_ENABLED */
2459
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002460 updateHWFPControl (team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002462 if ( root->r.r_active != master_active )
2463 root->r.r_active = master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002465 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466
2467 /* this race was fun to find. make sure the following is in the critical
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002468 * region otherwise assertions may fail occasionally since the old team
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469 * may be reallocated and the hierarchy appears inconsistent. it is
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002470 * actually safe to run and won't cause any bugs, but will cause those
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471 * assertion failures. it's only one deref&assign so might as well put this
2472 * in the critical region */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002473 master_th->th.th_team = parent_team;
2474 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2475 master_th->th.th_team_master = parent_team->t.t_threads[0];
2476 master_th->th.th_team_serialized = parent_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002477
2478 /* restore serialized team, if need be */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002479 if( parent_team->t.t_serialized &&
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480 parent_team != master_th->th.th_serial_team &&
2481 parent_team != root->r.r_root_team ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002482 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2483 master_th->th.th_serial_team = parent_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002484 }
2485
Jim Cownie5e8470a2013-09-27 10:38:44 +00002486 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002487 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
2488 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2489 // Remember master's state if we re-use this nested hot team
2490 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002491 --master_th->th.th_task_state_top; // pop
Jonathan Peyton54127982015-11-04 21:37:48 +00002492 // Now restore state at this level
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002493 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002495 // Copy the task team from the parent team to the master thread
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002496 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002497 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
Jonathan Peyton54127982015-11-04 21:37:48 +00002498 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002501 // TODO: GEH - cannot do this assertion because root thread not set up as executing
2502 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2503 master_th->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002504
2505 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2506
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002507#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002508 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00002509 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002510 }
2511#endif
2512
Jim Cownie5e8470a2013-09-27 10:38:44 +00002513 KMP_MB();
2514 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
2515}
2516
2517/* ------------------------------------------------------------------------ */
2518/* ------------------------------------------------------------------------ */
2519
2520/* Check whether we should push an internal control record onto the
2521 serial team stack. If so, do it. */
2522void
2523__kmp_save_internal_controls ( kmp_info_t * thread )
2524{
2525
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002526 if ( thread->th.th_team != thread->th.th_serial_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527 return;
2528 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002529 if (thread->th.th_team->t.t_serialized > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002530 int push = 0;
2531
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002532 if (thread->th.th_team->t.t_control_stack_top == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533 push = 1;
2534 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002535 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2536 thread->th.th_team->t.t_serialized ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002537 push = 1;
2538 }
2539 }
2540 if (push) { /* push a record on the serial team's stack */
2541 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
2542
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543 copy_icvs( control, & thread->th.th_current_task->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002544
2545 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2546
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002547 control->next = thread->th.th_team->t.t_control_stack_top;
2548 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549 }
2550 }
2551}
2552
2553/* Changes set_nproc */
2554void
2555__kmp_set_num_threads( int new_nth, int gtid )
2556{
2557 kmp_info_t *thread;
2558 kmp_root_t *root;
2559
2560 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2561 KMP_DEBUG_ASSERT( __kmp_init_serial );
2562
2563 if (new_nth < 1)
2564 new_nth = 1;
2565 else if (new_nth > __kmp_max_nth)
2566 new_nth = __kmp_max_nth;
2567
Jonathan Peyton45be4502015-08-11 21:36:41 +00002568 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569 thread = __kmp_threads[gtid];
2570
2571 __kmp_save_internal_controls( thread );
2572
2573 set__nproc( thread, new_nth );
2574
2575 //
2576 // If this omp_set_num_threads() call will cause the hot team size to be
2577 // reduced (in the absence of a num_threads clause), then reduce it now,
2578 // rather than waiting for the next parallel region.
2579 //
2580 root = thread->th.th_root;
2581 if ( __kmp_init_parallel && ( ! root->r.r_active )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002582 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2583#if KMP_NESTED_HOT_TEAMS
2584 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2585#endif
2586 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 kmp_team_t *hot_team = root->r.r_hot_team;
2588 int f;
2589
2590 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2591
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 // Release the extra threads we don't need any more.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2594 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
Jonathan Peyton54127982015-11-04 21:37:48 +00002595 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2596 // When decreasing team size, threads no longer in the team should unref task team.
2597 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2598 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599 __kmp_free_thread( hot_team->t.t_threads[f] );
2600 hot_team->t.t_threads[f] = NULL;
2601 }
2602 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002603#if KMP_NESTED_HOT_TEAMS
2604 if( thread->th.th_hot_teams ) {
2605 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2606 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2607 }
2608#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2611
2612 //
2613 // Update the t_nproc field in the threads that are still active.
2614 //
2615 for( f=0 ; f < new_nth; f++ ) {
2616 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2617 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2618 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002619 // Special flag in case omp_set_num_threads() call
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002620 hot_team->t.t_size_changed = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622}
2623
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624/* Changes max_active_levels */
2625void
2626__kmp_set_max_active_levels( int gtid, int max_active_levels )
2627{
2628 kmp_info_t *thread;
2629
2630 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2631 KMP_DEBUG_ASSERT( __kmp_init_serial );
2632
2633 // validate max_active_levels
2634 if( max_active_levels < 0 ) {
2635 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2636 // We ignore this call if the user has specified a negative value.
2637 // The current setting won't be changed. The last valid setting will be used.
2638 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
2639 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2640 return;
2641 }
2642 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2643 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2644 // We allow a zero value. (implementation defined behavior)
2645 } else {
2646 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2647 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2648 // Current upper limit is MAX_INT. (implementation defined behavior)
2649 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
2650 // Actually, the flow should never get here until we use MAX_INT limit.
2651 }
2652 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2653
2654 thread = __kmp_threads[ gtid ];
2655
2656 __kmp_save_internal_controls( thread );
2657
2658 set__max_active_levels( thread, max_active_levels );
2659
2660}
2661
2662/* Gets max_active_levels */
2663int
2664__kmp_get_max_active_levels( int gtid )
2665{
2666 kmp_info_t *thread;
2667
2668 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
2669 KMP_DEBUG_ASSERT( __kmp_init_serial );
2670
2671 thread = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002672 KMP_DEBUG_ASSERT( thread->th.th_current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002673 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002674 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2675 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676}
2677
2678/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2679void
2680__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
2681{
2682 kmp_info_t *thread;
2683// kmp_team_t *team;
2684
2685 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
2686 KMP_DEBUG_ASSERT( __kmp_init_serial );
2687
2688 // Check if the kind parameter is valid, correct if needed.
2689 // Valid parameters should fit in one of two intervals - standard or extended:
2690 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2691 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2692 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2693 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2694 {
2695 // TODO: Hint needs attention in case we change the default schedule.
2696 __kmp_msg(
2697 kmp_ms_warning,
2698 KMP_MSG( ScheduleKindOutOfRange, kind ),
2699 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
2700 __kmp_msg_null
2701 );
2702 kind = kmp_sched_default;
2703 chunk = 0; // ignore chunk value in case of bad kind
2704 }
2705
2706 thread = __kmp_threads[ gtid ];
2707
2708 __kmp_save_internal_controls( thread );
2709
2710 if ( kind < kmp_sched_upper_std ) {
2711 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2712 // differ static chunked vs. unchunked:
2713 // chunk should be invalid to indicate unchunked schedule (which is the default)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002714 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002715 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002716 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717 }
2718 } else {
2719 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002720 thread->th.th_current_task->td_icvs.sched.r_sched_type =
Jim Cownie5e8470a2013-09-27 10:38:44 +00002721 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2722 }
2723 if ( kind == kmp_sched_auto ) {
2724 // ignore parameter chunk for schedule auto
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002725 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002727 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728 }
2729}
2730
2731/* Gets def_sched_var ICV values */
2732void
2733__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
2734{
2735 kmp_info_t *thread;
2736 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737
2738 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
2739 KMP_DEBUG_ASSERT( __kmp_init_serial );
2740
2741 thread = __kmp_threads[ gtid ];
2742
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002743 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002744
2745 switch ( th_type ) {
2746 case kmp_sch_static:
2747 case kmp_sch_static_greedy:
2748 case kmp_sch_static_balanced:
2749 *kind = kmp_sched_static;
2750 *chunk = 0; // chunk was not set, try to show this fact via zero value
2751 return;
2752 case kmp_sch_static_chunked:
2753 *kind = kmp_sched_static;
2754 break;
2755 case kmp_sch_dynamic_chunked:
2756 *kind = kmp_sched_dynamic;
2757 break;
2758 case kmp_sch_guided_chunked:
2759 case kmp_sch_guided_iterative_chunked:
2760 case kmp_sch_guided_analytical_chunked:
2761 *kind = kmp_sched_guided;
2762 break;
2763 case kmp_sch_auto:
2764 *kind = kmp_sched_auto;
2765 break;
2766 case kmp_sch_trapezoidal:
2767 *kind = kmp_sched_trapezoidal;
2768 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002769#if KMP_STATIC_STEAL_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002770 case kmp_sch_static_steal:
2771 *kind = kmp_sched_static_steal;
2772 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002773#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002774 default:
2775 KMP_FATAL( UnknownSchedulingType, th_type );
2776 }
2777
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002778 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779}
2780
2781int
2782__kmp_get_ancestor_thread_num( int gtid, int level ) {
2783
2784 int ii, dd;
2785 kmp_team_t *team;
2786 kmp_info_t *thr;
2787
2788 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2789 KMP_DEBUG_ASSERT( __kmp_init_serial );
2790
2791 // validate level
2792 if( level == 0 ) return 0;
2793 if( level < 0 ) return -1;
2794 thr = __kmp_threads[ gtid ];
2795 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002796 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002797 if( level > ii ) return -1;
2798
2799#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002800 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801 // AC: we are in teams region where multiple nested teams have same level
2802 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2803 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2804 KMP_DEBUG_ASSERT( ii >= tlevel );
2805 // AC: As we need to pass by the teams league, we need to artificially increase ii
2806 if ( ii == tlevel ) {
2807 ii += 2; // three teams have same level
2808 } else {
2809 ii ++; // two teams have same level
2810 }
2811 }
2812 }
2813#endif
2814
2815 if( ii == level ) return __kmp_tid_from_gtid( gtid );
2816
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002817 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818 level++;
2819 while( ii > level )
2820 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002821 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002822 {
2823 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002824 if( ( team->t.t_serialized ) && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825 team = team->t.t_parent;
2826 continue;
2827 }
2828 if( ii > level ) {
2829 team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002830 dd = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002831 ii--;
2832 }
2833 }
2834
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002835 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836}
2837
2838int
2839__kmp_get_team_size( int gtid, int level ) {
2840
2841 int ii, dd;
2842 kmp_team_t *team;
2843 kmp_info_t *thr;
2844
2845 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
2846 KMP_DEBUG_ASSERT( __kmp_init_serial );
2847
2848 // validate level
2849 if( level == 0 ) return 1;
2850 if( level < 0 ) return -1;
2851 thr = __kmp_threads[ gtid ];
2852 team = thr->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002853 ii = team->t.t_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854 if( level > ii ) return -1;
2855
2856#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002857 if( thr->th.th_teams_microtask ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002858 // AC: we are in teams region where multiple nested teams have same level
2859 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2860 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
2861 KMP_DEBUG_ASSERT( ii >= tlevel );
2862 // AC: As we need to pass by the teams league, we need to artificially increase ii
2863 if ( ii == tlevel ) {
2864 ii += 2; // three teams have same level
2865 } else {
2866 ii ++; // two teams have same level
2867 }
2868 }
2869 }
2870#endif
2871
2872 while( ii > level )
2873 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002874 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002875 {
2876 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002877 if( team->t.t_serialized && ( !dd ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002878 team = team->t.t_parent;
2879 continue;
2880 }
2881 if( ii > level ) {
2882 team = team->t.t_parent;
2883 ii--;
2884 }
2885 }
2886
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002887 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002888}
2889
Jim Cownie5e8470a2013-09-27 10:38:44 +00002890kmp_r_sched_t
2891__kmp_get_schedule_global() {
2892// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
2893// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
2894
2895 kmp_r_sched_t r_sched;
2896
2897 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
2898 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
2899 // and thus have different run-time schedules in different roots (even in OMP 2.5)
2900 if ( __kmp_sched == kmp_sch_static ) {
2901 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
2902 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
2903 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
2904 } else {
2905 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2906 }
2907
2908 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
2909 r_sched.chunk = KMP_DEFAULT_CHUNK;
2910 } else {
2911 r_sched.chunk = __kmp_chunk;
2912 }
2913
2914 return r_sched;
2915}
2916
2917/* ------------------------------------------------------------------------ */
2918/* ------------------------------------------------------------------------ */
2919
2920
2921/*
2922 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2923 * at least argc number of *t_argv entries for the requested team.
2924 */
2925static void
2926__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
2927{
2928
2929 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002930 if( !realloc || argc > team->t.t_max_argc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002931
2932 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2933 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002934 /* if previously allocated heap space for args, free them */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002935 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2936 __kmp_free( (void *) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937
2938 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2939 /* use unused space in the cache line for arguments */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002940 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002941 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2942 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002943 team->t.t_argv = &team->t.t_inline_argv[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944 if ( __kmp_storage_map ) {
2945 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2946 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2947 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
2948 "team_%d.t_inline_argv",
2949 team->t.t_id );
2950 }
2951 } else {
2952 /* allocate space for arguments in the heap */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002953 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
Jim Cownie5e8470a2013-09-27 10:38:44 +00002954 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2955 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2956 team->t.t_id, team->t.t_max_argc ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002957 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958 if ( __kmp_storage_map ) {
2959 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2960 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
2961 team->t.t_id );
2962 }
2963 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002964 }
2965}
2966
2967static void
2968__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
2969{
2970 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002971 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002972 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
2973 team->t.t_disp_buffer = (dispatch_shared_info_t*)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002974 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002975 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002976 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977 team->t.t_max_nproc = max_nth;
2978
2979 /* setup dispatch buffers */
Jonathan Peyton71909c52016-03-02 22:42:06 +00002980 for(i = 0 ; i < num_disp_buff; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002981 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002982#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00002983 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2984#endif
2985 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002986}
2987
2988static void
2989__kmp_free_team_arrays(kmp_team_t *team) {
2990 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
2991 int i;
2992 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2993 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2994 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2995 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2996 }; // if
2997 }; // for
2998 __kmp_free(team->t.t_threads);
Jonathan Peytona58563d2016-03-29 20:05:27 +00002999 __kmp_free(team->t.t_disp_buffer);
3000 __kmp_free(team->t.t_dispatch);
3001 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003002 team->t.t_threads = NULL;
3003 team->t.t_disp_buffer = NULL;
3004 team->t.t_dispatch = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003006}
3007
3008static void
3009__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3010 kmp_info_t **oldThreads = team->t.t_threads;
3011
Jonathan Peytona58563d2016-03-29 20:05:27 +00003012 __kmp_free(team->t.t_disp_buffer);
3013 __kmp_free(team->t.t_dispatch);
3014 __kmp_free(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003015 __kmp_allocate_team_arrays(team, max_nth);
3016
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003017 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003018
3019 __kmp_free(oldThreads);
3020}
3021
3022static kmp_internal_control_t
3023__kmp_get_global_icvs( void ) {
3024
Jim Cownie5e8470a2013-09-27 10:38:44 +00003025 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003026
3027#if OMP_40_ENABLED
3028 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3029#endif /* OMP_40_ENABLED */
3030
3031 kmp_internal_control_t g_icvs = {
3032 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003033 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3034 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3035 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003037#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003039#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003040 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread)
3041 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3043 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003044#if OMP_40_ENABLED
3045 __kmp_nested_proc_bind.bind_types[0],
George Rokos28f31b42016-09-09 17:55:26 +00003046 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047#endif /* OMP_40_ENABLED */
3048 NULL //struct kmp_internal_control *next;
3049 };
3050
3051 return g_icvs;
3052}
3053
3054static kmp_internal_control_t
3055__kmp_get_x_global_icvs( const kmp_team_t *team ) {
3056
Jim Cownie5e8470a2013-09-27 10:38:44 +00003057 kmp_internal_control_t gx_icvs;
3058 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3059 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3060 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003061
3062 return gx_icvs;
3063}
3064
3065static void
3066__kmp_initialize_root( kmp_root_t *root )
3067{
3068 int f;
3069 kmp_team_t *root_team;
3070 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003071 int hot_team_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003072 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3073 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074 KMP_DEBUG_ASSERT( root );
3075 KMP_ASSERT( ! root->r.r_begin );
3076
3077 /* setup the root state structure */
3078 __kmp_init_lock( &root->r.r_begin_lock );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003079 root->r.r_begin = FALSE;
3080 root->r.r_active = FALSE;
3081 root->r.r_in_parallel = 0;
3082 root->r.r_blocktime = __kmp_dflt_blocktime;
3083 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003084
3085 /* setup the root team for this task */
3086 /* allocate the root team structure */
3087 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003088
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089 root_team =
3090 __kmp_allocate_team(
3091 root,
3092 1, // new_nproc
3093 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003094#if OMPT_SUPPORT
3095 0, // root parallel id
3096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097#if OMP_40_ENABLED
3098 __kmp_nested_proc_bind.bind_types[0],
3099#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003101 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003102 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003104#if USE_DEBUGGER
3105 // Non-NULL value should be assigned to make the debugger display the root team.
3106 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3107#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108
3109 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3110
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003111 root->r.r_root_team = root_team;
3112 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003113
3114 /* initialize root team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003115 root_team->t.t_threads[0] = NULL;
3116 root_team->t.t_nproc = 1;
3117 root_team->t.t_serialized = 1;
3118 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3119 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3120 root_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003121 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3122 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3123
3124 /* setup the hot team for this task */
3125 /* allocate the hot team structure */
3126 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003127
Jim Cownie5e8470a2013-09-27 10:38:44 +00003128 hot_team =
3129 __kmp_allocate_team(
3130 root,
3131 1, // new_nproc
3132 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003133#if OMPT_SUPPORT
3134 0, // root parallel id
3135#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003136#if OMP_40_ENABLED
3137 __kmp_nested_proc_bind.bind_types[0],
3138#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139 &r_icvs,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003140 0 // argc
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003141 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
Jim Cownie5e8470a2013-09-27 10:38:44 +00003142 );
3143 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3144
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003145 root->r.r_hot_team = hot_team;
3146 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003147
3148 /* first-time initialization */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003149 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003150
3151 /* initialize hot team */
3152 hot_team_max_nth = hot_team->t.t_max_nproc;
3153 for ( f = 0; f < hot_team_max_nth; ++ f ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003154 hot_team->t.t_threads[ f ] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155 }; // for
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003156 hot_team->t.t_nproc = 1;
3157 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3158 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3159 hot_team->t.t_sched.chunk = r_sched.chunk;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003160 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161}
3162
3163#ifdef KMP_DEBUG
3164
3165
3166typedef struct kmp_team_list_item {
3167 kmp_team_p const * entry;
3168 struct kmp_team_list_item * next;
3169} kmp_team_list_item_t;
3170typedef kmp_team_list_item_t * kmp_team_list_t;
3171
3172
3173static void
3174__kmp_print_structure_team_accum( // Add team to list of teams.
3175 kmp_team_list_t list, // List of teams.
3176 kmp_team_p const * team // Team to add.
3177) {
3178
3179 // List must terminate with item where both entry and next are NULL.
3180 // Team is added to the list only once.
3181 // List is sorted in ascending order by team id.
3182 // Team id is *not* a key.
3183
3184 kmp_team_list_t l;
3185
3186 KMP_DEBUG_ASSERT( list != NULL );
3187 if ( team == NULL ) {
3188 return;
3189 }; // if
3190
3191 __kmp_print_structure_team_accum( list, team->t.t_parent );
3192 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3193
3194 // Search list for the team.
3195 l = list;
3196 while ( l->next != NULL && l->entry != team ) {
3197 l = l->next;
3198 }; // while
3199 if ( l->next != NULL ) {
3200 return; // Team has been added before, exit.
3201 }; // if
3202
3203 // Team is not found. Search list again for insertion point.
3204 l = list;
3205 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3206 l = l->next;
3207 }; // while
3208
3209 // Insert team.
3210 {
3211 kmp_team_list_item_t * item =
3212 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3213 * item = * l;
3214 l->entry = team;
3215 l->next = item;
3216 }
3217
3218}
3219
3220static void
3221__kmp_print_structure_team(
3222 char const * title,
3223 kmp_team_p const * team
3224
3225) {
3226 __kmp_printf( "%s", title );
3227 if ( team != NULL ) {
3228 __kmp_printf( "%2x %p\n", team->t.t_id, team );
3229 } else {
3230 __kmp_printf( " - (nil)\n" );
3231 }; // if
3232}
3233
3234static void
3235__kmp_print_structure_thread(
3236 char const * title,
3237 kmp_info_p const * thread
3238
3239) {
3240 __kmp_printf( "%s", title );
3241 if ( thread != NULL ) {
3242 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3243 } else {
3244 __kmp_printf( " - (nil)\n" );
3245 }; // if
3246}
3247
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003248void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003249__kmp_print_structure(
3250 void
3251) {
3252
3253 kmp_team_list_t list;
3254
3255 // Initialize list of teams.
3256 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
3257 list->entry = NULL;
3258 list->next = NULL;
3259
3260 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3261 {
3262 int gtid;
3263 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3264 __kmp_printf( "%2d", gtid );
3265 if ( __kmp_threads != NULL ) {
3266 __kmp_printf( " %p", __kmp_threads[ gtid ] );
3267 }; // if
3268 if ( __kmp_root != NULL ) {
3269 __kmp_printf( " %p", __kmp_root[ gtid ] );
3270 }; // if
3271 __kmp_printf( "\n" );
3272 }; // for gtid
3273 }
3274
3275 // Print out __kmp_threads array.
3276 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
3277 if ( __kmp_threads != NULL ) {
3278 int gtid;
3279 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3280 kmp_info_t const * thread = __kmp_threads[ gtid ];
3281 if ( thread != NULL ) {
3282 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
3283 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
3284 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
3285 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
3286 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
3287 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
3288 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
3289 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
3290#if OMP_40_ENABLED
3291 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3292#endif
3293 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
3294 __kmp_printf( "\n" );
3295 __kmp_print_structure_team_accum( list, thread->th.th_team );
3296 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3297 }; // if
3298 }; // for gtid
3299 } else {
3300 __kmp_printf( "Threads array is not allocated.\n" );
3301 }; // if
3302
3303 // Print out __kmp_root array.
3304 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
3305 if ( __kmp_root != NULL ) {
3306 int gtid;
3307 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3308 kmp_root_t const * root = __kmp_root[ gtid ];
3309 if ( root != NULL ) {
3310 __kmp_printf( "GTID %2d %p:\n", gtid, root );
3311 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
3312 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
3313 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
3314 __kmp_printf( " Active?: %2d\n", root->r.r_active );
3315 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
3316 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
3317 __kmp_printf( "\n" );
3318 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3319 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3320 }; // if
3321 }; // for gtid
3322 } else {
3323 __kmp_printf( "Ubers array is not allocated.\n" );
3324 }; // if
3325
3326 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
3327 while ( list->next != NULL ) {
3328 kmp_team_p const * team = list->entry;
3329 int i;
3330 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
3331 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
3332 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
3333 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
3334 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
3335 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
3336 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3337 __kmp_printf( " Thread %2d: ", i );
3338 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
3339 }; // for i
3340 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
3341 __kmp_printf( "\n" );
3342 list = list->next;
3343 }; // while
3344
3345 // Print out __kmp_thread_pool and __kmp_team_pool.
3346 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
3347 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3348 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
3349 __kmp_printf( "\n" );
3350
3351 // Free team list.
3352 while ( list != NULL ) {
3353 kmp_team_list_item_t * item = list;
3354 list = list->next;
3355 KMP_INTERNAL_FREE( item );
3356 }; // while
3357
3358}
3359
3360#endif
3361
3362
3363//---------------------------------------------------------------------------
3364// Stuff for per-thread fast random number generator
3365// Table of primes
3366
3367static const unsigned __kmp_primes[] = {
3368 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3369 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3370 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3371 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3372 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3373 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3374 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3375 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3376 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3377 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3378 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3379 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3380 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3381 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3382 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3383 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3384};
3385
3386//---------------------------------------------------------------------------
3387// __kmp_get_random: Get a random number using a linear congruential method.
3388
3389unsigned short
3390__kmp_get_random( kmp_info_t * thread )
3391{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003392 unsigned x = thread->th.th_x;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003393 unsigned short r = x>>16;
3394
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003395 thread->th.th_x = x*thread->th.th_a+1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396
3397 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3398 thread->th.th_info.ds.ds_tid, r) );
3399
3400 return r;
3401}
3402//--------------------------------------------------------
3403// __kmp_init_random: Initialize a random number generator
3404
3405void
3406__kmp_init_random( kmp_info_t * thread )
3407{
3408 unsigned seed = thread->th.th_info.ds.ds_tid;
3409
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003410 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
3411 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3412 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003413}
3414
3415
3416#if KMP_OS_WINDOWS
3417/* reclaim array entries for root threads that are already dead, returns number reclaimed */
3418static int
3419__kmp_reclaim_dead_roots(void) {
3420 int i, r = 0;
3421
3422 for(i = 0; i < __kmp_threads_capacity; ++i) {
3423 if( KMP_UBER_GTID( i ) &&
3424 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3425 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
3426 r += __kmp_unregister_root_other_thread(i);
3427 }
3428 }
3429 return r;
3430}
3431#endif
3432
3433/*
3434 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
3435 free entries generated.
3436
3437 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
3438 already dead.
3439
3440 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
3441 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
3442 __kmp_tp_capacity, if threadprivate cache array has been created.
3443 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3444
3445 After any dead root reclamation, if the clipping value allows array expansion to result in the generation
3446 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
3447 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
3448 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
3449 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
3450 as many free slots as possible up to nWish.
3451
3452 If any argument is negative, the behavior is undefined.
3453*/
3454static int
3455__kmp_expand_threads(int nWish, int nNeed) {
3456 int added = 0;
3457 int old_tp_cached;
3458 int __kmp_actual_max_nth;
3459
3460 if(nNeed > nWish) /* normalize the arguments */
3461 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003462#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00003463/* only for Windows static library */
3464 /* reclaim array entries for root threads that are already dead */
3465 added = __kmp_reclaim_dead_roots();
3466
3467 if(nNeed) {
3468 nNeed -= added;
3469 if(nNeed < 0)
3470 nNeed = 0;
3471 }
3472 if(nWish) {
3473 nWish -= added;
3474 if(nWish < 0)
3475 nWish = 0;
3476 }
3477#endif
3478 if(nWish <= 0)
3479 return added;
3480
3481 while(1) {
3482 int nTarget;
3483 int minimumRequiredCapacity;
3484 int newCapacity;
3485 kmp_info_t **newThreads;
3486 kmp_root_t **newRoot;
3487
3488 //
3489 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
3490 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
3491 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
3492 // become > __kmp_max_nth in one of two ways:
3493 //
3494 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3495 // may not be resused by another thread, so we may need to increase
3496 // __kmp_threads_capacity to __kmp_max_threads + 1.
3497 //
3498 // 2) New foreign root(s) are encountered. We always register new
3499 // foreign roots. This may cause a smaller # of threads to be
3500 // allocated at subsequent parallel regions, but the worker threads
3501 // hang around (and eventually go to sleep) and need slots in the
3502 // __kmp_threads[] array.
3503 //
3504 // Anyway, that is the reason for moving the check to see if
3505 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3506 // instead of having it performed here. -BB
3507 //
3508 old_tp_cached = __kmp_tp_cached;
3509 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3510 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3511
3512 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
3513 nTarget = nWish;
3514 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3515 /* can't fulfil nWish, so try nNeed */
3516 if(nNeed) {
3517 nTarget = nNeed;
3518 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3519 /* possible expansion too small -- give up */
3520 break;
3521 }
3522 } else {
3523 /* best-effort */
3524 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3525 if(!nTarget) {
3526 /* can expand at all -- give up */
3527 break;
3528 }
3529 }
3530 }
3531 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3532
3533 newCapacity = __kmp_threads_capacity;
3534 do{
3535 newCapacity =
3536 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3537 (newCapacity << 1) :
3538 __kmp_actual_max_nth;
3539 } while(newCapacity < minimumRequiredCapacity);
3540 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3541 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003542 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
3543 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544 memset(newThreads + __kmp_threads_capacity, 0,
3545 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
3546 memset(newRoot + __kmp_threads_capacity, 0,
3547 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
3548
3549 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3550 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
3551 while we were allocating the expanded array, and our new capacity is larger than the threadprivate
3552 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
3553 of a double-check pair.
3554 */
3555 __kmp_free(newThreads);
3556 continue; /* start over and try again */
3557 }
3558 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3559 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3560 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
3561 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3562 __kmp_free(newThreads);
3563 continue; /* start over and try again */
3564 } else {
3565 /* success */
3566 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
3567 //
3568 *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
3569 *(kmp_root_t**volatile*)&__kmp_root = newRoot;
3570 added += newCapacity - __kmp_threads_capacity;
3571 *(volatile int*)&__kmp_threads_capacity = newCapacity;
3572 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
Alp Toker8f2d3f02014-02-24 10:40:15 +00003573 break; /* succeeded, so we can exit the loop */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574 }
3575 }
3576 return added;
3577}
3578
3579/* register the current thread as a root thread and obtain our gtid */
3580/* we must have the __kmp_initz_lock held at this point */
3581/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
3582int
3583__kmp_register_root( int initial_thread )
3584{
3585 kmp_info_t *root_thread;
3586 kmp_root_t *root;
3587 int gtid;
3588 int capacity;
3589 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3590 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
3591 KMP_MB();
3592
3593
3594 /*
3595 2007-03-02:
3596
3597 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
3598 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
3599 return false (that means there is at least one empty slot in __kmp_threads array), but it
3600 is possible the only free slot is #0, which is reserved for initial thread and so cannot be
3601 used for this one. Following code workarounds this bug.
3602
3603 However, right solution seems to be not reserving slot #0 for initial thread because:
3604 (1) there is no magic in slot #0,
3605 (2) we cannot detect initial thread reliably (the first thread which does serial
3606 initialization may be not a real initial thread).
3607 */
3608 capacity = __kmp_threads_capacity;
3609 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3610 -- capacity;
3611 }; // if
3612
3613 /* see if there are too many threads */
3614 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3615 if ( __kmp_tp_cached ) {
3616 __kmp_msg(
3617 kmp_ms_fatal,
3618 KMP_MSG( CantRegisterNewThread ),
3619 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3620 KMP_HNT( PossibleSystemLimitOnThreads ),
3621 __kmp_msg_null
3622 );
3623 }
3624 else {
3625 __kmp_msg(
3626 kmp_ms_fatal,
3627 KMP_MSG( CantRegisterNewThread ),
3628 KMP_HNT( SystemLimitOnThreads ),
3629 __kmp_msg_null
3630 );
3631 }
3632 }; // if
3633
3634 /* find an available thread slot */
3635 /* Don't reassign the zero slot since we need that to only be used by initial
3636 thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003637 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3638 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3640 KMP_ASSERT( gtid < __kmp_threads_capacity );
3641
3642 /* update global accounting */
3643 __kmp_all_nth ++;
3644 TCW_4(__kmp_nth, __kmp_nth + 1);
3645
3646 //
3647 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
3648 // for low numbers of procs, and method #2 (keyed API call) for higher
3649 // numbers of procs.
3650 //
3651 if ( __kmp_adjust_gtid_mode ) {
3652 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3653 if ( TCR_4(__kmp_gtid_mode) != 2) {
3654 TCW_4(__kmp_gtid_mode, 2);
3655 }
3656 }
3657 else {
3658 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3659 TCW_4(__kmp_gtid_mode, 1);
3660 }
3661 }
3662 }
3663
3664#ifdef KMP_ADJUST_BLOCKTIME
3665 /* Adjust blocktime to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00003666 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003667 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3668 if ( __kmp_nth > __kmp_avail_proc ) {
3669 __kmp_zero_bt = TRUE;
3670 }
3671 }
3672#endif /* KMP_ADJUST_BLOCKTIME */
3673
3674 /* setup this new hierarchy */
3675 if( ! ( root = __kmp_root[gtid] )) {
3676 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
3677 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3678 }
3679
3680 __kmp_initialize_root( root );
3681
3682 /* setup new root thread structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003683 if( root->r.r_uber_thread ) {
3684 root_thread = root->r.r_uber_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 } else {
3686 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
3687 if ( __kmp_storage_map ) {
3688 __kmp_print_thread_storage_map( root_thread, gtid );
3689 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003690 root_thread->th.th_info .ds.ds_gtid = gtid;
3691 root_thread->th.th_root = root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003693 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003694 }
3695 #if USE_FAST_MEMORY
3696 __kmp_initialize_fast_memory( root_thread );
3697 #endif /* USE_FAST_MEMORY */
3698
3699 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003700 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003701 __kmp_initialize_bget( root_thread );
3702 #endif
3703 __kmp_init_random( root_thread ); // Initialize random number generator
3704 }
3705
3706 /* setup the serial team held in reserve by the root thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003707 if( ! root_thread->th.th_serial_team ) {
3708 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003709 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003710
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003711 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003712#if OMPT_SUPPORT
3713 0, // root parallel id
3714#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003715#if OMP_40_ENABLED
3716 proc_bind_default,
3717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003719 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003721 KMP_ASSERT( root_thread->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003722 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003723 root_thread->th.th_serial_team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724
3725 /* drop root_thread into place */
3726 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3727
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003728 root->r.r_root_team->t.t_threads[0] = root_thread;
3729 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3730 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3731 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
3732 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733
3734 /* initialize the thread, get it ready to go */
3735 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
Jonathan Peytonf2520102016-04-18 21:33:01 +00003736 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737
3738 /* prepare the master thread for get_gtid() */
3739 __kmp_gtid_set_specific( gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003740
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003741#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003742 __kmp_itt_thread_name( gtid );
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003743#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003744
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745 #ifdef KMP_TDATA_GTID
3746 __kmp_gtid = gtid;
3747 #endif
3748 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3749 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750
3751 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3752 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003753 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754 KMP_INIT_BARRIER_STATE ) );
3755 { // Initialize barrier data.
3756 int b;
3757 for ( b = 0; b < bs_last_barrier; ++ b ) {
3758 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003759#if USE_DEBUGGER
3760 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3761#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003762 }; // for
3763 }
3764 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3765
Alp Toker763b9392014-02-28 09:42:41 +00003766#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003767# if OMP_40_ENABLED
3768 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3769 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3770 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3771 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3772# endif
3773
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774 if ( TCR_4(__kmp_init_middle) ) {
3775 __kmp_affinity_set_init_mask( gtid, TRUE );
3776 }
Alp Toker763b9392014-02-28 09:42:41 +00003777#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003778
3779 __kmp_root_counter ++;
3780
3781 KMP_MB();
3782 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3783
3784 return gtid;
3785}
3786
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003787#if KMP_NESTED_HOT_TEAMS
3788static int
3789__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
3790{
3791 int i, n, nth;
3792 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3793 if( !hot_teams || !hot_teams[level].hot_team ) {
3794 return 0;
3795 }
3796 KMP_DEBUG_ASSERT( level < max_level );
3797 kmp_team_t *team = hot_teams[level].hot_team;
3798 nth = hot_teams[level].hot_team_nth;
3799 n = nth - 1; // master is not freed
3800 if( level < max_level - 1 ) {
3801 for( i = 0; i < nth; ++i ) {
3802 kmp_info_t *th = team->t.t_threads[i];
3803 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3804 if( i > 0 && th->th.th_hot_teams ) {
3805 __kmp_free( th->th.th_hot_teams );
3806 th->th.th_hot_teams = NULL;
3807 }
3808 }
3809 }
3810 __kmp_free_team( root, team, NULL );
3811 return n;
3812}
3813#endif
3814
Jim Cownie5e8470a2013-09-27 10:38:44 +00003815/* Resets a root thread and clear its root and hot teams.
3816 Returns the number of __kmp_threads entries directly and indirectly freed.
3817*/
3818static int
3819__kmp_reset_root(int gtid, kmp_root_t *root)
3820{
3821 kmp_team_t * root_team = root->r.r_root_team;
3822 kmp_team_t * hot_team = root->r.r_hot_team;
3823 int n = hot_team->t.t_nproc;
3824 int i;
3825
3826 KMP_DEBUG_ASSERT( ! root->r.r_active );
3827
3828 root->r.r_root_team = NULL;
3829 root->r.r_hot_team = NULL;
3830 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
3831 // to __kmp_free_team().
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003832 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3833#if KMP_NESTED_HOT_TEAMS
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003834 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003835 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3836 kmp_info_t *th = hot_team->t.t_threads[i];
Andrey Churbanov2eca95c2016-07-08 14:53:24 +00003837 if( __kmp_hot_teams_max_level > 1 ) {
3838 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3839 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003840 if( th->th.th_hot_teams ) {
3841 __kmp_free( th->th.th_hot_teams );
3842 th->th.th_hot_teams = NULL;
3843 }
3844 }
3845 }
3846#endif
3847 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003848
Jim Cownie5e8470a2013-09-27 10:38:44 +00003849 //
3850 // Before we can reap the thread, we need to make certain that all
3851 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
3852 //
3853 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3854 __kmp_wait_to_unref_task_teams();
3855 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003856
3857 #if KMP_OS_WINDOWS
3858 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3859 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
3860 (LPVOID)&(root->r.r_uber_thread->th),
3861 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3862 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3863 #endif /* KMP_OS_WINDOWS */
3864
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003865#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00003866 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003867 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3868 int gtid = __kmp_get_gtid();
3869 __ompt_thread_end(ompt_thread_initial, gtid);
3870 }
3871#endif
3872
Jim Cownie5e8470a2013-09-27 10:38:44 +00003873 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3874 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3875
3876 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
3877 root->r.r_uber_thread = NULL;
3878 /* mark root as no longer in use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003879 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003880
3881 return n;
3882}
3883
3884void
3885__kmp_unregister_root_current_thread( int gtid )
3886{
Jim Cownie77c2a632014-09-03 11:34:33 +00003887 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003888 /* this lock should be ok, since unregister_root_current_thread is never called during
3889 * and abort, only during a normal close. furthermore, if you have the
3890 * forkjoin lock, you should never try to get the initz lock */
Jim Cownie77c2a632014-09-03 11:34:33 +00003891
3892 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3893 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3894 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3895 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3896 return;
3897 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003898 kmp_root_t *root = __kmp_root[gtid];
3899
Jim Cownie5e8470a2013-09-27 10:38:44 +00003900 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3901 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3902 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3903 KMP_ASSERT( root->r.r_active == FALSE );
3904
Jim Cownie5e8470a2013-09-27 10:38:44 +00003905
3906 KMP_MB();
3907
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003908#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003909 kmp_info_t * thread = __kmp_threads[gtid];
3910 kmp_team_t * team = thread->th.th_team;
3911 kmp_task_team_t * task_team = thread->th.th_task_team;
3912
3913 // we need to wait for the proxy tasks before finishing the thread
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003914 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3915#if OMPT_SUPPORT
3916 // the runtime is shutting down so we won't report any events
3917 thread->th.ompt_thread_info.state = ompt_state_undefined;
3918#endif
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003919 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003920 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003921#endif
3922
Jim Cownie5e8470a2013-09-27 10:38:44 +00003923 __kmp_reset_root(gtid, root);
3924
3925 /* free up this thread slot */
3926 __kmp_gtid_set_specific( KMP_GTID_DNE );
3927#ifdef KMP_TDATA_GTID
3928 __kmp_gtid = KMP_GTID_DNE;
3929#endif
3930
3931 KMP_MB();
3932 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3933
3934 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3935}
3936
Jonathan Peyton2321d572015-06-08 19:25:25 +00003937#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003938/* __kmp_forkjoin_lock must be already held
3939 Unregisters a root thread that is not the current thread. Returns the number of
3940 __kmp_threads entries freed as a result.
3941 */
3942static int
3943__kmp_unregister_root_other_thread( int gtid )
3944{
3945 kmp_root_t *root = __kmp_root[gtid];
3946 int r;
3947
3948 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3949 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3950 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3951 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3952 KMP_ASSERT( root->r.r_active == FALSE );
3953
3954 r = __kmp_reset_root(gtid, root);
3955 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3956 return r;
3957}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003958#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003959
Jim Cownie5e8470a2013-09-27 10:38:44 +00003960#if KMP_DEBUG
3961void __kmp_task_info() {
3962
3963 kmp_int32 gtid = __kmp_entry_gtid();
3964 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3965 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003966 kmp_team_t *steam = this_thr->th.th_serial_team;
3967 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003968
3969 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3970 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3971}
3972#endif // KMP_DEBUG
3973
Jim Cownie5e8470a2013-09-27 10:38:44 +00003974/* TODO optimize with one big memclr, take out what isn't needed,
Andrey Churbanov6d224db2015-02-10 18:37:43 +00003975 * split responsibility to workers as much as possible, and delay
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976 * initialization of features as much as possible */
3977static void
3978__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
3979{
3980 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
3981 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003982 kmp_info_t *master = team->t.t_threads[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00003983 KMP_DEBUG_ASSERT( this_thr != NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003984 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003985 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003986 KMP_DEBUG_ASSERT( team->t.t_threads );
3987 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3988 KMP_DEBUG_ASSERT( master );
3989 KMP_DEBUG_ASSERT( master->th.th_root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003990
3991 KMP_MB();
3992
3993 TCW_SYNC_PTR(this_thr->th.th_team, team);
3994
3995 this_thr->th.th_info.ds.ds_tid = tid;
3996 this_thr->th.th_set_nproc = 0;
3997#if OMP_40_ENABLED
3998 this_thr->th.th_set_proc_bind = proc_bind_default;
Alp Toker98758b02014-03-02 04:12:06 +00003999# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000 this_thr->th.th_new_place = this_thr->th.th_current_place;
4001# endif
4002#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004003 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004004
4005 /* setup the thread's cache of the team structure */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004006 this_thr->th.th_team_nproc = team->t.t_nproc;
4007 this_thr->th.th_team_master = master;
4008 this_thr->th.th_team_serialized = team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004009 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4010
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004011 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004012
4013 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4014 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4015
4016 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4017
4018 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4019 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4020 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004021
4022 /* TODO no worksharing in speculative threads */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004023 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004024
4025 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004026
4027#ifdef BUILD_TV
4028 this_thr->th.th_local.tv_data = 0;
4029#endif
4030
4031 if ( ! this_thr->th.th_pri_common ) {
4032 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4033 if ( __kmp_storage_map ) {
4034 __kmp_print_storage_map_gtid(
4035 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4036 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4037 );
4038 }; // if
4039 this_thr->th.th_pri_head = NULL;
4040 }; // if
4041
4042 /* Initialize dynamic dispatch */
4043 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004044 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004045 /*
4046 * Use team max_nproc since this will never change for the team.
4047 */
4048 size_t disp_size = sizeof( dispatch_private_info_t ) *
Jonathan Peyton067325f2016-05-31 19:01:15 +00004049 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004050 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4051 KMP_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004052 KMP_DEBUG_ASSERT( team->t.t_dispatch );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004053 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4054
4055 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004056#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00004057 dispatch->th_doacross_buf_idx = 0;
4058#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004059 if( ! dispatch->th_disp_buffer ) {
4060 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004061
4062 if ( __kmp_storage_map ) {
4063 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
Jonathan Peyton067325f2016-05-31 19:01:15 +00004064 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4066 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4067 gtid, team->t.t_id, gtid );
4068 }
4069 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004070 memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071 }
4072
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004073 dispatch->th_dispatch_pr_current = 0;
4074 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004075
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004076 dispatch->th_deo_fcn = 0; /* ORDERED */
4077 dispatch->th_dxo_fcn = 0; /* END ORDERED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004078 }
4079
4080 this_thr->th.th_next_pool = NULL;
4081
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004082 if (!this_thr->th.th_task_state_memo_stack) {
Jonathan Peyton54127982015-11-04 21:37:48 +00004083 size_t i;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004084 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
4085 this_thr->th.th_task_state_top = 0;
4086 this_thr->th.th_task_state_stack_sz = 4;
Jonathan Peyton54127982015-11-04 21:37:48 +00004087 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
4088 this_thr->th.th_task_state_memo_stack[i] = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004089 }
4090
Jim Cownie5e8470a2013-09-27 10:38:44 +00004091 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4092 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4093
4094 KMP_MB();
4095}
4096
4097
4098/* allocate a new thread for the requesting team. this is only called from within a
4099 * forkjoin critical section. we will first try to get an available thread from the
4100 * thread pool. if none is available, we will fork a new one assuming we are able
4101 * to create a new one. this should be assured, as the caller should check on this
4102 * first.
4103 */
4104kmp_info_t *
4105__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4106{
4107 kmp_team_t *serial_team;
4108 kmp_info_t *new_thr;
4109 int new_gtid;
4110
4111 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4112 KMP_DEBUG_ASSERT( root && team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004113#if !KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004114 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004115#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004116 KMP_MB();
4117
4118 /* first, try to get one from the thread pool */
4119 if ( __kmp_thread_pool ) {
4120
4121 new_thr = (kmp_info_t*)__kmp_thread_pool;
4122 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4123 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4124 __kmp_thread_pool_insert_pt = NULL;
4125 }
4126 TCW_4(new_thr->th.th_in_pool, FALSE);
4127 //
4128 // Don't touch th_active_in_pool or th_active.
4129 // The worker thread adjusts those flags as it sleeps/awakens.
4130 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00004131 __kmp_thread_pool_nth--;
4132
4133 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4134 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004135 KMP_ASSERT( ! new_thr->th.th_team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004136 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4137 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4138
4139 /* setup the thread structure */
4140 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4141 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4142
4143 TCW_4(__kmp_nth, __kmp_nth + 1);
4144
Jonathan Peyton54127982015-11-04 21:37:48 +00004145 new_thr->th.th_task_state = 0;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004146 new_thr->th.th_task_state_top = 0;
4147 new_thr->th.th_task_state_stack_sz = 4;
4148
Jim Cownie5e8470a2013-09-27 10:38:44 +00004149#ifdef KMP_ADJUST_BLOCKTIME
4150 /* Adjust blocktime back to zero if necessar y */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004151 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004152 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4153 if ( __kmp_nth > __kmp_avail_proc ) {
4154 __kmp_zero_bt = TRUE;
4155 }
4156 }
4157#endif /* KMP_ADJUST_BLOCKTIME */
4158
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004159#if KMP_DEBUG
4160 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
4161 int b;
4162 kmp_balign_t * balign = new_thr->th.th_bar;
4163 for( b = 0; b < bs_last_barrier; ++ b )
4164 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4165#endif
4166
Jim Cownie5e8470a2013-09-27 10:38:44 +00004167 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4168 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4169
4170 KMP_MB();
4171 return new_thr;
4172 }
4173
4174
4175 /* no, well fork a new one */
4176 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4177 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4178
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004179#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00004180 //
4181 // If this is the first worker thread the RTL is creating, then also
4182 // launch the monitor thread. We try to do this as early as possible.
4183 //
4184 if ( ! TCR_4( __kmp_init_monitor ) ) {
4185 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4186 if ( ! TCR_4( __kmp_init_monitor ) ) {
4187 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4188 TCW_4( __kmp_init_monitor, 1 );
4189 __kmp_create_monitor( & __kmp_monitor );
4190 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004191 #if KMP_OS_WINDOWS
4192 // AC: wait until monitor has started. This is a fix for CQ232808.
4193 // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4194 // work in between, then there is high probability that monitor thread started after
4195 // the library shutdown. At shutdown it is too late to cope with the problem, because
4196 // when the master is in DllMain (process detach) the monitor has no chances to start
4197 // (it is blocked), and master has no means to inform the monitor that the library has gone,
4198 // because all the memory which the monitor can access is going to be released/reset.
4199 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4200 KMP_YIELD( TRUE );
4201 }
4202 KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4203 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004204 }
4205 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4206 }
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00004207#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004208
4209 KMP_MB();
4210 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4211 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4212 }
4213
4214 /* allocate space for it. */
4215 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4216
4217 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4218
4219 if ( __kmp_storage_map ) {
4220 __kmp_print_thread_storage_map( new_thr, new_gtid );
4221 }
4222
4223 /* add the reserve serialized team, initialized from the team's master thread */
4224 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004227
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004228 new_thr->th.th_serial_team = serial_team =
Jim Cownie5e8470a2013-09-27 10:38:44 +00004229 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004230#if OMPT_SUPPORT
4231 0, // root parallel id
4232#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004233#if OMP_40_ENABLED
4234 proc_bind_default,
4235#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004236 &r_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004237 0 USE_NESTED_HOT_ARG(NULL) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004238 }
4239 KMP_ASSERT ( serial_team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004240 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4241 serial_team->t.t_threads[0] = new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004242 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4243 new_thr ) );
4244
4245 /* setup the thread structures */
4246 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4247
4248 #if USE_FAST_MEMORY
4249 __kmp_initialize_fast_memory( new_thr );
4250 #endif /* USE_FAST_MEMORY */
4251
4252 #if KMP_USE_BGET
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004253 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254 __kmp_initialize_bget( new_thr );
4255 #endif
4256
4257 __kmp_init_random( new_thr ); // Initialize random number generator
4258
4259 /* Initialize these only once when thread is grabbed for a team allocation */
4260 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4261 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4262
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004263 int b;
4264 kmp_balign_t * balign = new_thr->th.th_bar;
4265 for(b=0; b<bs_last_barrier; ++b) {
4266 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4267 balign[b].bb.team = NULL;
4268 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4269 balign[b].bb.use_oncore_barrier = 0;
4270 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004271
4272 new_thr->th.th_spin_here = FALSE;
4273 new_thr->th.th_next_waiting = 0;
4274
Alp Toker98758b02014-03-02 04:12:06 +00004275#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004276 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4277 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4278 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4279 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4280#endif
4281
4282 TCW_4(new_thr->th.th_in_pool, FALSE);
4283 new_thr->th.th_active_in_pool = FALSE;
4284 TCW_4(new_thr->th.th_active, TRUE);
4285
4286 /* adjust the global counters */
4287 __kmp_all_nth ++;
4288 __kmp_nth ++;
4289
4290 //
4291 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4292 // for low numbers of procs, and method #2 (keyed API call) for higher
4293 // numbers of procs.
4294 //
4295 if ( __kmp_adjust_gtid_mode ) {
4296 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4297 if ( TCR_4(__kmp_gtid_mode) != 2) {
4298 TCW_4(__kmp_gtid_mode, 2);
4299 }
4300 }
4301 else {
4302 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4303 TCW_4(__kmp_gtid_mode, 1);
4304 }
4305 }
4306 }
4307
4308#ifdef KMP_ADJUST_BLOCKTIME
4309 /* Adjust blocktime back to zero if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00004310 /* Middle initialization might not have occurred yet */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004311 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4312 if ( __kmp_nth > __kmp_avail_proc ) {
4313 __kmp_zero_bt = TRUE;
4314 }
4315 }
4316#endif /* KMP_ADJUST_BLOCKTIME */
4317
4318 /* actually fork it and create the new worker thread */
4319 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4320 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4321 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4322
Jim Cownie5e8470a2013-09-27 10:38:44 +00004323 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4324 KMP_MB();
4325 return new_thr;
4326}
4327
4328/*
4329 * reinitialize team for reuse.
4330 *
4331 * The hot team code calls this case at every fork barrier, so EPCC barrier
4332 * test are extremely sensitive to changes in it, esp. writes to the team
4333 * struct, which cause a cache invalidation in all threads.
4334 *
4335 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
4336 */
4337static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004338__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004339 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4340 team->t.t_threads[0], team ) );
Jim Cownie181b4bb2013-12-23 17:28:57 +00004341 KMP_DEBUG_ASSERT( team && new_icvs);
4342 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004343 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004344
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004345 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jim Cownie5e8470a2013-09-27 10:38:44 +00004346
Jim Cownie181b4bb2013-12-23 17:28:57 +00004347 // Copy ICVs to the master thread's implicit taskdata
Jim Cownie181b4bb2013-12-23 17:28:57 +00004348 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004349 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004350
4351 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4352 team->t.t_threads[0], team ) );
4353}
4354
Jim Cownie5e8470a2013-09-27 10:38:44 +00004355
4356/* initialize the team data structure
4357 * this assumes the t_threads and t_max_nproc are already set
4358 * also, we don't touch the arguments */
4359static void
4360__kmp_initialize_team(
4361 kmp_team_t * team,
4362 int new_nproc,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004363 kmp_internal_control_t * new_icvs,
4364 ident_t * loc
Jim Cownie5e8470a2013-09-27 10:38:44 +00004365) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00004366 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
4367
Jim Cownie5e8470a2013-09-27 10:38:44 +00004368 /* verify */
4369 KMP_DEBUG_ASSERT( team );
4370 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4371 KMP_DEBUG_ASSERT( team->t.t_threads );
4372 KMP_MB();
4373
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004374 team->t.t_master_tid = 0; /* not needed */
4375 /* team->t.t_master_bar; not needed */
4376 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4377 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004378
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004379 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4380 team->t.t_next_pool = NULL;
4381 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004382
4383 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004384 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004385
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004386 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4387 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004388
4389#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004390 team->t.t_fp_control_saved = FALSE; /* not needed */
4391 team->t.t_x87_fpu_control_word = 0; /* not needed */
4392 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004393#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4394
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004395 team->t.t_construct = 0;
4396 __kmp_init_lock( & team->t.t_single_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004397
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004398 team->t.t_ordered .dt.t_value = 0;
4399 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004400
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004401 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004402
4403#ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004404 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004405#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004406 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004408 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004410 __kmp_reinitialize_team( team, new_icvs, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004411
4412 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004413 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004414}
4415
Alp Toker98758b02014-03-02 04:12:06 +00004416#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004417/* Sets full mask for thread and returns old mask, no changes to structures. */
4418static void
4419__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4420{
4421 if ( KMP_AFFINITY_CAPABLE() ) {
4422 int status;
4423 if ( old_mask != NULL ) {
4424 status = __kmp_get_system_affinity( old_mask, TRUE );
4425 int error = errno;
4426 if ( status != 0 ) {
4427 __kmp_msg(
4428 kmp_ms_fatal,
4429 KMP_MSG( ChangeThreadAffMaskError ),
4430 KMP_ERR( error ),
4431 __kmp_msg_null
4432 );
4433 }
4434 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004435 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004436 }
4437}
4438#endif
4439
Alp Toker98758b02014-03-02 04:12:06 +00004440#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004441
4442//
4443// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4444// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004445// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004446// The master thread's partition should already include its current binding.
4447//
4448static void
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004449__kmp_partition_places( kmp_team_t *team, int update_master_only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004450{
4451 //
4452 // Copy the master thread's place partion to the team struct
4453 //
4454 kmp_info_t *master_th = team->t.t_threads[0];
4455 KMP_DEBUG_ASSERT( master_th != NULL );
4456 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4457 int first_place = master_th->th.th_first_place;
4458 int last_place = master_th->th.th_last_place;
4459 int masters_place = master_th->th.th_current_place;
4460 team->t.t_first_place = first_place;
4461 team->t.t_last_place = last_place;
4462
4463 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4464 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4465 masters_place, first_place, last_place ) );
4466
4467 switch ( proc_bind ) {
4468
4469 case proc_bind_default:
4470 //
4471 // serial teams might have the proc_bind policy set to
4472 // proc_bind_default. It doesn't matter, as we don't
4473 // rebind the master thread for any proc_bind policy.
4474 //
4475 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4476 break;
4477
4478 case proc_bind_master:
4479 {
4480 int f;
4481 int n_th = team->t.t_nproc;
4482 for ( f = 1; f < n_th; f++ ) {
4483 kmp_info_t *th = team->t.t_threads[f];
4484 KMP_DEBUG_ASSERT( th != NULL );
4485 th->th.th_first_place = first_place;
4486 th->th.th_last_place = last_place;
4487 th->th.th_new_place = masters_place;
4488
4489 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4490 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4491 team->t.t_id, f, masters_place, first_place, last_place ) );
4492 }
4493 }
4494 break;
4495
4496 case proc_bind_close:
4497 {
4498 int f;
4499 int n_th = team->t.t_nproc;
4500 int n_places;
4501 if ( first_place <= last_place ) {
4502 n_places = last_place - first_place + 1;
4503 }
4504 else {
4505 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4506 }
4507 if ( n_th <= n_places ) {
4508 int place = masters_place;
4509 for ( f = 1; f < n_th; f++ ) {
4510 kmp_info_t *th = team->t.t_threads[f];
4511 KMP_DEBUG_ASSERT( th != NULL );
4512
4513 if ( place == last_place ) {
4514 place = first_place;
4515 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004516 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004517 place = 0;
4518 }
4519 else {
4520 place++;
4521 }
4522 th->th.th_first_place = first_place;
4523 th->th.th_last_place = last_place;
4524 th->th.th_new_place = place;
4525
4526 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4527 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4528 team->t.t_id, f, place, first_place, last_place ) );
4529 }
4530 }
4531 else {
4532 int S, rem, gap, s_count;
4533 S = n_th / n_places;
4534 s_count = 0;
4535 rem = n_th - ( S * n_places );
4536 gap = rem > 0 ? n_places/rem : n_places;
4537 int place = masters_place;
4538 int gap_ct = gap;
4539 for ( f = 0; f < n_th; f++ ) {
4540 kmp_info_t *th = team->t.t_threads[f];
4541 KMP_DEBUG_ASSERT( th != NULL );
4542
4543 th->th.th_first_place = first_place;
4544 th->th.th_last_place = last_place;
4545 th->th.th_new_place = place;
4546 s_count++;
4547
4548 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4549 // do nothing, add an extra thread to place on next iteration
4550 }
4551 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4552 // we added an extra thread to this place; move to next place
4553 if ( place == last_place ) {
4554 place = first_place;
4555 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004556 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004557 place = 0;
4558 }
4559 else {
4560 place++;
4561 }
4562 s_count = 0;
4563 gap_ct = 1;
4564 rem--;
4565 }
4566 else if (s_count == S) { // place full; don't add extra
4567 if ( place == last_place ) {
4568 place = first_place;
4569 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004570 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004571 place = 0;
4572 }
4573 else {
4574 place++;
4575 }
4576 gap_ct++;
4577 s_count = 0;
4578 }
4579
4580 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4581 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4582 team->t.t_id, f, th->th.th_new_place, first_place,
4583 last_place ) );
4584 }
4585 KMP_DEBUG_ASSERT( place == masters_place );
4586 }
4587 }
4588 break;
4589
4590 case proc_bind_spread:
4591 {
4592 int f;
4593 int n_th = team->t.t_nproc;
4594 int n_places;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004595 int thidx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004596 if ( first_place <= last_place ) {
4597 n_places = last_place - first_place + 1;
4598 }
4599 else {
4600 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4601 }
4602 if ( n_th <= n_places ) {
4603 int place = masters_place;
4604 int S = n_places/n_th;
4605 int s_count, rem, gap, gap_ct;
4606 rem = n_places - n_th*S;
4607 gap = rem ? n_th/rem : 1;
4608 gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004609 thidx = n_th;
4610 if (update_master_only == 1)
4611 thidx = 1;
4612 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004613 kmp_info_t *th = team->t.t_threads[f];
4614 KMP_DEBUG_ASSERT( th != NULL );
4615
4616 th->th.th_first_place = place;
4617 th->th.th_new_place = place;
4618 s_count = 1;
4619 while (s_count < S) {
4620 if ( place == last_place ) {
4621 place = first_place;
4622 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004623 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004624 place = 0;
4625 }
4626 else {
4627 place++;
4628 }
4629 s_count++;
4630 }
4631 if (rem && (gap_ct == gap)) {
4632 if ( place == last_place ) {
4633 place = first_place;
4634 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004635 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004636 place = 0;
4637 }
4638 else {
4639 place++;
4640 }
4641 rem--;
4642 gap_ct = 0;
4643 }
4644 th->th.th_last_place = place;
4645 gap_ct++;
4646
4647 if ( place == last_place ) {
4648 place = first_place;
4649 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004650 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004651 place = 0;
4652 }
4653 else {
4654 place++;
4655 }
4656
4657 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4658 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4659 team->t.t_id, f, th->th.th_new_place,
4660 th->th.th_first_place, th->th.th_last_place ) );
4661 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004662 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004663 }
4664 else {
4665 int S, rem, gap, s_count;
4666 S = n_th / n_places;
4667 s_count = 0;
4668 rem = n_th - ( S * n_places );
4669 gap = rem > 0 ? n_places/rem : n_places;
4670 int place = masters_place;
4671 int gap_ct = gap;
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004672 thidx = n_th;
4673 if (update_master_only == 1)
4674 thidx = 1;
4675 for ( f = 0; f < thidx; f++ ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004676 kmp_info_t *th = team->t.t_threads[f];
4677 KMP_DEBUG_ASSERT( th != NULL );
4678
4679 th->th.th_first_place = place;
4680 th->th.th_last_place = place;
4681 th->th.th_new_place = place;
4682 s_count++;
4683
4684 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4685 // do nothing, add an extra thread to place on next iteration
4686 }
4687 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4688 // we added an extra thread to this place; move on to next place
4689 if ( place == last_place ) {
4690 place = first_place;
4691 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004692 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004693 place = 0;
4694 }
4695 else {
4696 place++;
4697 }
4698 s_count = 0;
4699 gap_ct = 1;
4700 rem--;
4701 }
4702 else if (s_count == S) { // place is full; don't add extra thread
4703 if ( place == last_place ) {
4704 place = first_place;
4705 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004706 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004707 place = 0;
4708 }
4709 else {
4710 place++;
4711 }
4712 gap_ct++;
4713 s_count = 0;
4714 }
4715
4716 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4717 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4718 team->t.t_id, f, th->th.th_new_place,
4719 th->th.th_first_place, th->th.th_last_place) );
4720 }
Jonas Hahnfeld170fcc82016-07-04 05:58:10 +00004721 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004722 }
4723 }
4724 break;
4725
4726 default:
4727 break;
4728 }
4729
4730 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4731}
4732
Alp Toker98758b02014-03-02 04:12:06 +00004733#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004734
4735/* allocate a new team data structure to use. take one off of the free pool if available */
4736kmp_team_t *
4737__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004738#if OMPT_SUPPORT
4739 ompt_parallel_id_t ompt_parallel_id,
4740#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004741#if OMP_40_ENABLED
4742 kmp_proc_bind_t new_proc_bind,
4743#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004744 kmp_internal_control_t *new_icvs,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004745 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00004746{
Jonathan Peyton45be4502015-08-11 21:36:41 +00004747 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004748 int f;
4749 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004750 int use_hot_team = ! root->r.r_active;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004751 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004752
4753 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
4754 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4755 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4756 KMP_MB();
4757
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004758#if KMP_NESTED_HOT_TEAMS
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004759 kmp_hot_team_ptr_t *hot_teams;
4760 if( master ) {
4761 team = master->th.th_team;
4762 level = team->t.t_active_level;
4763 if( master->th.th_teams_microtask ) { // in teams construct?
4764 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1
4765 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
4766 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams
4767 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
4768 }
4769 }
4770 hot_teams = master->th.th_hot_teams;
4771 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4772 { // hot team has already been allocated for given level
4773 use_hot_team = 1;
4774 } else {
4775 use_hot_team = 0;
4776 }
4777 }
4778#endif
4779 // Optimization to use a "hot" team
4780 if( use_hot_team && new_nproc > 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004781 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004782#if KMP_NESTED_HOT_TEAMS
4783 team = hot_teams[level].hot_team;
4784#else
4785 team = root->r.r_hot_team;
4786#endif
4787#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00004788 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004789 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4790 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004791 }
4792#endif
4793
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004794 // Has the number of threads changed?
4795 /* Let's assume the most common case is that the number of threads is unchanged, and
4796 put that case first. */
4797 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4798 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004799 // This case can mean that omp_set_num_threads() was called and the hot team size
4800 // was already reduced, so we check the special flag
4801 if ( team->t.t_size_changed == -1 ) {
4802 team->t.t_size_changed = 1;
4803 } else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004804 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004805 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004806
4807 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004808 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004809 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4810 team->t.t_sched.chunk != new_sched.chunk)
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004811 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004812
4813 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4814
4815 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4816 0, team->t.t_threads[0], team ) );
4817 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4818
4819#if OMP_40_ENABLED
4820# if KMP_AFFINITY_SUPPORTED
Andrey Churbanovf0c4ba62015-08-17 10:04:38 +00004821 if ( ( team->t.t_size_changed == 0 )
4822 && ( team->t.t_proc_bind == new_proc_bind ) ) {
Jonathan Peyton7ba9bae2016-05-26 19:09:46 +00004823 if (new_proc_bind == proc_bind_spread) {
4824 __kmp_partition_places(team, 1); // add flag to update only master for spread
4825 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004826 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4827 team->t.t_id, new_proc_bind, team->t.t_first_place,
4828 team->t.t_last_place ) );
4829 }
4830 else {
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00004831 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004832 __kmp_partition_places( team );
4833 }
4834# else
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004835 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004836# endif /* KMP_AFFINITY_SUPPORTED */
4837#endif /* OMP_40_ENABLED */
4838 }
4839 else if( team->t.t_nproc > new_nproc ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004840 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4841
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004842 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004843#if KMP_NESTED_HOT_TEAMS
4844 if( __kmp_hot_teams_mode == 0 ) {
4845 // AC: saved number of threads should correspond to team's value in this mode,
4846 // can be bigger in mode 1, when hot team has some threads in reserve
4847 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4848 hot_teams[level].hot_team_nth = new_nproc;
4849#endif // KMP_NESTED_HOT_TEAMS
4850 /* release the extra threads we don't need any more */
4851 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4852 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
Jonathan Peyton54127982015-11-04 21:37:48 +00004853 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4854 // When decreasing team size, threads no longer in the team should unref task team.
4855 team->t.t_threads[f]->th.th_task_team = NULL;
4856 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004857 __kmp_free_thread( team->t.t_threads[ f ] );
4858 team->t.t_threads[ f ] = NULL;
4859 }
4860#if KMP_NESTED_HOT_TEAMS
4861 } // (__kmp_hot_teams_mode == 0)
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004862 else {
4863 // When keeping extra threads in team, switch threads to wait on own b_go flag
4864 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4865 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4866 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4867 for (int b=0; b<bs_last_barrier; ++b) {
4868 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4869 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4870 }
4871 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4872 }
4873 }
4874 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004875#endif // KMP_NESTED_HOT_TEAMS
4876 team->t.t_nproc = new_nproc;
4877 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004878 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4879 team->t.t_sched.chunk != new_icvs->sched.chunk)
4880 team->t.t_sched = new_icvs->sched;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004881 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004882
Jim Cownie5e8470a2013-09-27 10:38:44 +00004883 /* update the remaining threads */
Jonathan Peyton54127982015-11-04 21:37:48 +00004884 for(f = 0; f < new_nproc; ++f) {
4885 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004886 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004887 // restore the current task state of the master thread: should be the implicit task
4888 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4889 0, team->t.t_threads[0], team ) );
4890
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004891 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004892
4893#ifdef KMP_DEBUG
4894 for ( f = 0; f < team->t.t_nproc; f++ ) {
4895 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4896 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4897 }
4898#endif
4899
4900#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00004901 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00004902# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004903 __kmp_partition_places( team );
4904# endif
4905#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004906 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004907 else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004908#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004909 kmp_affin_mask_t *old_mask;
4910 if ( KMP_AFFINITY_CAPABLE() ) {
4911 KMP_CPU_ALLOC(old_mask);
4912 }
4913#endif
4914
4915 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4916
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004917 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004918
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004919#if KMP_NESTED_HOT_TEAMS
4920 int avail_threads = hot_teams[level].hot_team_nth;
4921 if( new_nproc < avail_threads )
4922 avail_threads = new_nproc;
4923 kmp_info_t **other_threads = team->t.t_threads;
4924 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4925 // Adjust barrier data of reserved threads (if any) of the team
4926 // Other data will be set in __kmp_initialize_info() below.
4927 int b;
4928 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4929 for ( b = 0; b < bs_last_barrier; ++ b ) {
4930 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4931 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004932#if USE_DEBUGGER
4933 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4934#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004935 }
4936 }
4937 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4938 // we have all needed threads in reserve, no need to allocate any
4939 // this only possible in mode 1, cannot have reserved threads in mode 0
4940 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4941 team->t.t_nproc = new_nproc; // just get reserved threads involved
4942 } else {
4943 // we may have some threads in reserve, but not enough
4944 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
4945 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4946#endif // KMP_NESTED_HOT_TEAMS
4947 if(team->t.t_max_nproc < new_nproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004948 /* reallocate larger arrays */
4949 __kmp_reallocate_team_arrays(team, new_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004950 __kmp_reinitialize_team( team, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004951 }
4952
Alp Toker98758b02014-03-02 04:12:06 +00004953#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004954 /* Temporarily set full mask for master thread before
4955 creation of workers. The reason is that workers inherit
4956 the affinity from master, so if a lot of workers are
4957 created on the single core quickly, they don't get
4958 a chance to set their own affinity for a long time.
4959 */
4960 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4961#endif
4962
4963 /* allocate new threads for the hot team */
4964 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4965 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4966 KMP_DEBUG_ASSERT( new_worker );
4967 team->t.t_threads[ f ] = new_worker;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004968
Jonathan Peytond26e2132015-09-10 18:44:30 +00004969 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004970 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4971 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4972 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4973
4974 { // Initialize barrier data for new threads.
4975 int b;
4976 kmp_balign_t * balign = new_worker->th.th_bar;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004977 for( b = 0; b < bs_last_barrier; ++ b ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004978 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004979 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004980#if USE_DEBUGGER
4981 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4982#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004983 }
4984 }
4985 }
4986
Alp Toker98758b02014-03-02 04:12:06 +00004987#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004988 if ( KMP_AFFINITY_CAPABLE() ) {
4989 /* Restore initial master thread's affinity mask */
4990 __kmp_set_system_affinity( old_mask, TRUE );
4991 KMP_CPU_FREE(old_mask);
4992 }
4993#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004994#if KMP_NESTED_HOT_TEAMS
4995 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4996#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004997 /* make sure everyone is syncronized */
Jonathan Peyton54127982015-11-04 21:37:48 +00004998 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004999 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005000
Jonathan Peytone03b62f2015-10-08 18:49:40 +00005001 /* reinitialize the threads */
5002 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
Jonathan Peyton54127982015-11-04 21:37:48 +00005003 for (f=0; f < team->t.t_nproc; ++f)
5004 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5005 if (level) { // set th_task_state for new threads in nested hot team
5006 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
Jonathan Peyton1be692e2015-11-30 20:14:05 +00005007 // th_task_state for the new threads. th_task_state for master thread will not be accurate until
Jonathan Peyton54127982015-11-04 21:37:48 +00005008 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
5009 for (f=old_nproc; f < team->t.t_nproc; ++f)
5010 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005011 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005012 else { // set th_task_state for new threads in non-nested hot team
5013 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
5014 for (f=old_nproc; f < team->t.t_nproc; ++f)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005015 team->t.t_threads[f]->th.th_task_state = old_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005016 }
5017
Jim Cownie5e8470a2013-09-27 10:38:44 +00005018#ifdef KMP_DEBUG
5019 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5020 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5021 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5022 }
5023#endif
5024
5025#if OMP_40_ENABLED
Jonathan Peyton6b560f02016-07-01 17:54:32 +00005026 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
Alp Toker98758b02014-03-02 04:12:06 +00005027# if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005028 __kmp_partition_places( team );
5029# endif
5030#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005031 } // Check changes in number of threads
Jim Cownie5e8470a2013-09-27 10:38:44 +00005032
5033#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005034 kmp_info_t *master = team->t.t_threads[0];
5035 if( master->th.th_teams_microtask ) {
5036 for( f = 1; f < new_nproc; ++f ) {
5037 // propagate teams construct specific info to workers
5038 kmp_info_t *thr = team->t.t_threads[f];
5039 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5040 thr->th.th_teams_level = master->th.th_teams_level;
5041 thr->th.th_teams_size = master->th.th_teams_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005042 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005043 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005044#endif /* OMP_40_ENABLED */
5045#if KMP_NESTED_HOT_TEAMS
5046 if( level ) {
Jonathan Peyton0dd75fd2015-10-20 19:21:04 +00005047 // Sync barrier state for nested hot teams, not needed for outermost hot team.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005048 for( f = 1; f < new_nproc; ++f ) {
5049 kmp_info_t *thr = team->t.t_threads[f];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005050 int b;
5051 kmp_balign_t * balign = thr->th.th_bar;
5052 for( b = 0; b < bs_last_barrier; ++ b ) {
5053 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5054 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005055#if USE_DEBUGGER
5056 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5057#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005058 }
5059 }
5060 }
5061#endif // KMP_NESTED_HOT_TEAMS
Jim Cownie5e8470a2013-09-27 10:38:44 +00005062
5063 /* reallocate space for arguments if necessary */
5064 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005065 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005066 //
5067 // The hot team re-uses the previous task team,
5068 // if untouched during the previous release->gather phase.
5069 //
5070
5071 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5072
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005073#if KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +00005074 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005075 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5076 team->t.t_task_team[0], team->t.t_task_team[1] ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005077 }
5078#endif
5079
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005080#if OMPT_SUPPORT
5081 __ompt_team_assign_id(team, ompt_parallel_id);
5082#endif
5083
Jim Cownie5e8470a2013-09-27 10:38:44 +00005084 KMP_MB();
5085
5086 return team;
5087 }
5088
5089 /* next, let's try to take one from the team pool */
5090 KMP_MB();
5091 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5092 {
5093 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5094 if ( team->t.t_max_nproc >= max_nproc ) {
5095 /* take this team from the team pool */
5096 __kmp_team_pool = team->t.t_next_pool;
5097
5098 /* setup the team for fresh use */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005099 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005100
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005101 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5102 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5103 team->t.t_task_team[0] = NULL;
5104 team->t.t_task_team[1] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005105
5106 /* reallocate space for arguments if necessary */
5107 __kmp_alloc_argv_entries( argc, team, TRUE );
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00005108 KMP_CHECK_UPDATE(team->t.t_argc, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005109
5110 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5111 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5112 { // Initialize barrier data.
5113 int b;
5114 for ( b = 0; b < bs_last_barrier; ++ b) {
5115 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005116#if USE_DEBUGGER
5117 team->t.t_bar[ b ].b_master_arrived = 0;
5118 team->t.t_bar[ b ].b_team_arrived = 0;
5119#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005120 }
5121 }
5122
5123#if OMP_40_ENABLED
5124 team->t.t_proc_bind = new_proc_bind;
5125#endif
5126
5127 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005128
5129#if OMPT_SUPPORT
5130 __ompt_team_assign_id(team, ompt_parallel_id);
5131#endif
5132
Jim Cownie5e8470a2013-09-27 10:38:44 +00005133 KMP_MB();
5134
5135 return team;
5136 }
5137
5138 /* reap team if it is too small, then loop back and check the next one */
5139 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5140 /* TODO: Use technique to find the right size hot-team, don't reap them */
5141 team = __kmp_reap_team( team );
5142 __kmp_team_pool = team;
5143 }
5144
5145 /* nothing available in the pool, no matter, make a new team! */
5146 KMP_MB();
5147 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5148
5149 /* and set it up */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005150 team->t.t_max_nproc = max_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005151 /* NOTE well, for some reason allocating one big buffer and dividing it
5152 * up seems to really hurt performance a lot on the P4, so, let's not use
5153 * this... */
5154 __kmp_allocate_team_arrays( team, max_nproc );
Jim Cownie181b4bb2013-12-23 17:28:57 +00005155
5156 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005157 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005158
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005159 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5160 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5161 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5162 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
Jim Cownie5e8470a2013-09-27 10:38:44 +00005163
5164 if ( __kmp_storage_map ) {
5165 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5166 }
5167
5168 /* allocate space for arguments */
5169 __kmp_alloc_argv_entries( argc, team, FALSE );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005170 team->t.t_argc = argc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005171
5172 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5173 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5174 { // Initialize barrier data.
5175 int b;
5176 for ( b = 0; b < bs_last_barrier; ++ b ) {
5177 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005178#if USE_DEBUGGER
5179 team->t.t_bar[ b ].b_master_arrived = 0;
5180 team->t.t_bar[ b ].b_team_arrived = 0;
5181#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005182 }
5183 }
5184
5185#if OMP_40_ENABLED
5186 team->t.t_proc_bind = new_proc_bind;
5187#endif
5188
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005189#if OMPT_SUPPORT
5190 __ompt_team_assign_id(team, ompt_parallel_id);
5191 team->t.ompt_serialized_team_info = NULL;
5192#endif
5193
Jim Cownie5e8470a2013-09-27 10:38:44 +00005194 KMP_MB();
5195
5196 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5197
5198 return team;
5199}
5200
5201/* TODO implement hot-teams at all levels */
5202/* TODO implement lazy thread release on demand (disband request) */
5203
5204/* free the team. return it to the team pool. release all the threads
5205 * associated with it */
5206void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005207__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005208{
5209 int f;
5210 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5211
5212 /* verify state */
5213 KMP_DEBUG_ASSERT( root );
5214 KMP_DEBUG_ASSERT( team );
5215 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5216 KMP_DEBUG_ASSERT( team->t.t_threads );
5217
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005218 int use_hot_team = team == root->r.r_hot_team;
5219#if KMP_NESTED_HOT_TEAMS
5220 int level;
5221 kmp_hot_team_ptr_t *hot_teams;
5222 if( master ) {
5223 level = team->t.t_active_level - 1;
5224 if( master->th.th_teams_microtask ) { // in teams construct?
5225 if( master->th.th_teams_size.nteams > 1 ) {
5226 ++level; // level was not increased in teams construct for team_of_masters
5227 }
5228 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5229 master->th.th_teams_level == team->t.t_level ) {
5230 ++level; // level was not increased in teams construct for team_of_workers before the parallel
5231 } // team->t.t_level will be increased inside parallel
5232 }
5233 hot_teams = master->th.th_hot_teams;
5234 if( level < __kmp_hot_teams_max_level ) {
5235 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5236 use_hot_team = 1;
5237 }
5238 }
5239#endif // KMP_NESTED_HOT_TEAMS
5240
Jim Cownie5e8470a2013-09-27 10:38:44 +00005241 /* team is done working */
5242 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005243 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jim Cownie5e8470a2013-09-27 10:38:44 +00005244 // Do not reset pointer to parent team to NULL for hot teams.
5245
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005246 /* if we are non-hot team, release our threads */
5247 if( ! use_hot_team ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005248 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005249 // Delete task teams
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005250 int tt_idx;
5251 for (tt_idx=0; tt_idx<2; ++tt_idx) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005252 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5253 if ( task_team != NULL ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00005254 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
5255 team->t.t_threads[f]->th.th_task_team = NULL;
5256 }
5257 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005258#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton54127982015-11-04 21:37:48 +00005259 __kmp_free_task_team( master, task_team );
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005260#endif
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005261 team->t.t_task_team[tt_idx] = NULL;
5262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005263 }
5264 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005265
5266 // Reset pointer to parent team only for non-hot teams.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005267 team->t.t_parent = NULL;
Jonathan Peyton2b749b32016-05-12 21:54:30 +00005268 team->t.t_level = 0;
5269 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005270
Jim Cownie5e8470a2013-09-27 10:38:44 +00005271 /* free the worker threads */
5272 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5273 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5274 __kmp_free_thread( team->t.t_threads[ f ] );
5275 team->t.t_threads[ f ] = NULL;
5276 }
5277
Jim Cownie5e8470a2013-09-27 10:38:44 +00005278 /* put the team back in the team pool */
5279 /* TODO limit size of team pool, call reap_team if pool too large */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005280 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005281 __kmp_team_pool = (volatile kmp_team_t*) team;
5282 }
5283
5284 KMP_MB();
5285}
5286
5287
5288/* reap the team. destroy it, reclaim all its resources and free its memory */
5289kmp_team_t *
5290__kmp_reap_team( kmp_team_t *team )
5291{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005292 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005293
5294 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005295 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5296 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5297 KMP_DEBUG_ASSERT( team->t.t_threads );
5298 KMP_DEBUG_ASSERT( team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005299
5300 /* TODO clean the threads that are a part of this? */
5301
5302 /* free stuff */
5303
5304 __kmp_free_team_arrays( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005305 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5306 __kmp_free( (void*) team->t.t_argv );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005307 __kmp_free( team );
5308
5309 KMP_MB();
5310 return next_pool;
5311}
5312
5313//
5314// Free the thread. Don't reap it, just place it on the pool of available
5315// threads.
5316//
5317// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5318// binding for the affinity mechanism to be useful.
5319//
5320// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5321// However, we want to avoid a potential performance problem by always
5322// scanning through the list to find the correct point at which to insert
5323// the thread (potential N**2 behavior). To do this we keep track of the
5324// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5325// With single-level parallelism, threads will always be added to the tail
5326// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5327// parallelism, all bets are off and we may need to scan through the entire
5328// free list.
5329//
5330// This change also has a potentially large performance benefit, for some
5331// applications. Previously, as threads were freed from the hot team, they
5332// would be placed back on the free list in inverse order. If the hot team
5333// grew back to it's original size, then the freed thread would be placed
5334// back on the hot team in reverse order. This could cause bad cache
5335// locality problems on programs where the size of the hot team regularly
5336// grew and shrunk.
5337//
5338// Now, for single-level parallelism, the OMP tid is alway == gtid.
5339//
5340void
5341__kmp_free_thread( kmp_info_t *this_th )
5342{
5343 int gtid;
5344 kmp_info_t **scan;
5345
5346 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5347 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5348
5349 KMP_DEBUG_ASSERT( this_th );
5350
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005351 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
5352 int b;
5353 kmp_balign_t *balign = this_th->th.th_bar;
5354 for (b=0; b<bs_last_barrier; ++b) {
5355 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5356 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5357 balign[b].bb.team = NULL;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005358 balign[b].bb.leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005359 }
Jonathan Peyton54127982015-11-04 21:37:48 +00005360 this_th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005361
Jim Cownie5e8470a2013-09-27 10:38:44 +00005362 /* put thread back on the free pool */
5363 TCW_PTR(this_th->th.th_team, NULL);
5364 TCW_PTR(this_th->th.th_root, NULL);
5365 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5366
5367 //
5368 // If the __kmp_thread_pool_insert_pt is already past the new insert
5369 // point, then we need to re-scan the entire list.
5370 //
5371 gtid = this_th->th.th_info.ds.ds_gtid;
5372 if ( __kmp_thread_pool_insert_pt != NULL ) {
5373 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5374 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5375 __kmp_thread_pool_insert_pt = NULL;
5376 }
5377 }
5378
5379 //
5380 // Scan down the list to find the place to insert the thread.
5381 // scan is the address of a link in the list, possibly the address of
5382 // __kmp_thread_pool itself.
5383 //
5384 // In the absence of nested parallism, the for loop will have 0 iterations.
5385 //
5386 if ( __kmp_thread_pool_insert_pt != NULL ) {
5387 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5388 }
5389 else {
5390 scan = (kmp_info_t **)&__kmp_thread_pool;
5391 }
5392 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5393 scan = &( (*scan)->th.th_next_pool ) );
5394
5395 //
5396 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5397 // to its address.
5398 //
5399 TCW_PTR(this_th->th.th_next_pool, *scan);
5400 __kmp_thread_pool_insert_pt = *scan = this_th;
5401 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5402 || ( this_th->th.th_info.ds.ds_gtid
5403 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5404 TCW_4(this_th->th.th_in_pool, TRUE);
5405 __kmp_thread_pool_nth++;
5406
5407 TCW_4(__kmp_nth, __kmp_nth - 1);
5408
5409#ifdef KMP_ADJUST_BLOCKTIME
5410 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005411 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005412 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5413 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5414 if ( __kmp_nth <= __kmp_avail_proc ) {
5415 __kmp_zero_bt = FALSE;
5416 }
5417 }
5418#endif /* KMP_ADJUST_BLOCKTIME */
5419
5420 KMP_MB();
5421}
5422
Jim Cownie5e8470a2013-09-27 10:38:44 +00005423
Jim Cownie5e8470a2013-09-27 10:38:44 +00005424/* ------------------------------------------------------------------------ */
5425
5426void *
5427__kmp_launch_thread( kmp_info_t *this_thr )
5428{
5429 int gtid = this_thr->th.th_info.ds.ds_gtid;
5430/* void *stack_data;*/
5431 kmp_team_t *(*volatile pteam);
5432
5433 KMP_MB();
5434 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
5435
5436 if( __kmp_env_consistency_check ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005437 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005438 }
5439
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005440#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005441 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005442 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5443 this_thr->th.ompt_thread_info.wait_id = 0;
5444 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005445 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005446 __ompt_thread_begin(ompt_thread_worker, gtid);
5447 }
5448 }
5449#endif
5450
Jim Cownie5e8470a2013-09-27 10:38:44 +00005451 /* This is the place where threads wait for work */
5452 while( ! TCR_4(__kmp_global.g.g_done) ) {
5453 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5454 KMP_MB();
5455
5456 /* wait for work to do */
5457 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5458
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005459#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005460 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005461 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5462 }
5463#endif
5464
Jim Cownie5e8470a2013-09-27 10:38:44 +00005465 /* No tid yet since not part of a team */
5466 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5467
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005468#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005469 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005470 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5471 }
5472#endif
5473
Jim Cownie5e8470a2013-09-27 10:38:44 +00005474 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5475
5476 /* have we been allocated? */
5477 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005478#if OMPT_SUPPORT
5479 ompt_task_info_t *task_info;
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005480 ompt_parallel_id_t my_parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005481 if (ompt_enabled) {
5482 task_info = __ompt_get_taskinfo(0);
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005483 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005484 }
5485#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486 /* we were just woken up, so run our new task */
5487 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5488 int rc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005489 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5490 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005491
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005492 updateHWFPControl (*pteam);
5493
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005494#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005495 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005496 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton117a94f2015-06-29 17:28:57 +00005497 // Initialize OMPT task id for implicit task.
5498 int tid = __kmp_tid_from_gtid(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005499 task_info->task_id = __ompt_task_id_new(tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005500 }
5501#endif
5502
Jonathan Peyton45be4502015-08-11 21:36:41 +00005503 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005504 {
Jonathan Peyton45be4502015-08-11 21:36:41 +00005505 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00005506 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5507 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005508 rc = (*pteam)->t.t_invoke( gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005509 }
Jonathan Peyton45be4502015-08-11 21:36:41 +00005510 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005511 KMP_ASSERT( rc );
5512
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005513#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005514 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005515 /* no frame set while outside task */
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005516 task_info->frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005517
5518 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5519 }
5520#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005521 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005522 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5523 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005524 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005525 /* join barrier after parallel region */
5526 __kmp_join_barrier( gtid );
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005527#if OMPT_SUPPORT && OMPT_TRACE
5528 if (ompt_enabled) {
5529 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
Jonas Hahnfelddbf627d2016-01-28 10:39:45 +00005530 // don't access *pteam here: it may have already been freed
5531 // by the master thread behind the barrier (possible race)
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005532 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5533 my_parallel_id, task_info->task_id);
5534 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00005535 task_info->frame.exit_runtime_frame = NULL;
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005536 task_info->task_id = 0;
5537 }
Jonathan Peyton61118492016-05-20 19:03:38 +00005538#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005539 }
5540 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005541 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005542
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005543#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005544 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005545 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5546 __ompt_thread_end(ompt_thread_worker, gtid);
5547 }
5548#endif
5549
Jonathan Peyton54127982015-11-04 21:37:48 +00005550 this_thr->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005551 /* run the destructors for the threadprivate data for this thread */
5552 __kmp_common_destroy_gtid( gtid );
5553
5554 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
5555 KMP_MB();
5556 return this_thr;
5557}
5558
5559/* ------------------------------------------------------------------------ */
5560/* ------------------------------------------------------------------------ */
5561
Jim Cownie5e8470a2013-09-27 10:38:44 +00005562void
5563__kmp_internal_end_dest( void *specific_gtid )
5564{
Jim Cownie181b4bb2013-12-23 17:28:57 +00005565 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005566 #pragma warning( push )
5567 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5568 #endif
5569 // Make sure no significant bits are lost
5570 int gtid = (kmp_intptr_t)specific_gtid - 1;
Jim Cownie181b4bb2013-12-23 17:28:57 +00005571 #if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00005572 #pragma warning( pop )
5573 #endif
5574
5575 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5576 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5577 * this is because 0 is reserved for the nothing-stored case */
5578
5579 /* josh: One reason for setting the gtid specific data even when it is being
5580 destroyed by pthread is to allow gtid lookup through thread specific data
5581 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5582 that gets executed in the call to __kmp_internal_end_thread, actually
5583 gets the gtid through the thread specific data. Setting it here seems
5584 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5585 to run smoothly.
5586 todo: get rid of this after we remove the dependence on
5587 __kmp_gtid_get_specific
5588 */
5589 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5590 __kmp_gtid_set_specific( gtid );
5591 #ifdef KMP_TDATA_GTID
5592 __kmp_gtid = gtid;
5593 #endif
5594 __kmp_internal_end_thread( gtid );
5595}
5596
Jonathan Peyton99016992015-05-26 17:32:53 +00005597#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005598
5599// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
Jonathan Peyton66338292015-06-01 02:37:28 +00005600// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
Jim Cownie5e8470a2013-09-27 10:38:44 +00005601// option in makefile.mk works fine.
5602
5603__attribute__(( destructor ))
5604void
5605__kmp_internal_end_dtor( void )
5606{
5607 __kmp_internal_end_atexit();
5608}
5609
5610void
5611__kmp_internal_end_fini( void )
5612{
5613 __kmp_internal_end_atexit();
5614}
5615
5616#endif
5617
5618/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
5619void
5620__kmp_internal_end_atexit( void )
5621{
5622 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
5623 /* [Windows]
5624 josh: ideally, we want to completely shutdown the library in this atexit handler, but
5625 stat code that depends on thread specific data for gtid fails because that data becomes
5626 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
5627 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
5628 stat code and use __kmp_internal_end_library to cleanly shutdown the library.
5629
5630// TODO: Can some of this comment about GVS be removed?
5631 I suspect that the offending stat code is executed when the calling thread tries to
5632 clean up a dead root thread's data structures, resulting in GVS code trying to close
5633 the GVS structures for that thread, but since the stat code uses
5634 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
5635 cleaning up itself instead of another thread, it gets confused. This happens because
5636 allowing a thread to unregister and cleanup another thread is a recent modification for
5637 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
5638 thread may end up trying to unregister another thread only if thread death does not
5639 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
5640 specific data destructor function to detect thread death. For Windows dynamic, there
5641 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
5642 workaround is applicable only for Windows static stat library.
5643 */
5644 __kmp_internal_end_library( -1 );
5645 #if KMP_OS_WINDOWS
5646 __kmp_close_console();
5647 #endif
5648}
5649
5650static void
5651__kmp_reap_thread(
5652 kmp_info_t * thread,
5653 int is_root
5654) {
5655
Alp Toker8f2d3f02014-02-24 10:40:15 +00005656 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005657
5658 int gtid;
5659
5660 KMP_DEBUG_ASSERT( thread != NULL );
5661
5662 gtid = thread->th.th_info.ds.ds_gtid;
5663
5664 if ( ! is_root ) {
5665
5666 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5667 /* Assume the threads are at the fork barrier here */
5668 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5669 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005670 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5671 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005672 }; // if
5673
Jim Cownie5e8470a2013-09-27 10:38:44 +00005674 // Terminate OS thread.
5675 __kmp_reap_worker( thread );
5676
5677 //
5678 // The thread was killed asynchronously. If it was actively
Jonathan Peytonbf0cc3a2016-01-27 20:57:32 +00005679 // spinning in the thread pool, decrement the global count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005680 //
5681 // There is a small timing hole here - if the worker thread was
5682 // just waking up after sleeping in the pool, had reset it's
5683 // th_active_in_pool flag but not decremented the global counter
5684 // __kmp_thread_pool_active_nth yet, then the global counter
5685 // might not get updated.
5686 //
5687 // Currently, this can only happen as the library is unloaded,
5688 // so there are no harmful side effects.
5689 //
5690 if ( thread->th.th_active_in_pool ) {
5691 thread->th.th_active_in_pool = FALSE;
5692 KMP_TEST_THEN_DEC32(
5693 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5694 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5695 }
5696
5697 // Decrement # of [worker] threads in the pool.
5698 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5699 --__kmp_thread_pool_nth;
5700 }; // if
5701
5702 // Free the fast memory for tasking
5703 #if USE_FAST_MEMORY
5704 __kmp_free_fast_memory( thread );
5705 #endif /* USE_FAST_MEMORY */
5706
5707 __kmp_suspend_uninitialize_thread( thread );
5708
5709 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5710 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5711
5712 -- __kmp_all_nth;
5713 // __kmp_nth was decremented when thread is added to the pool.
5714
5715#ifdef KMP_ADJUST_BLOCKTIME
5716 /* Adjust blocktime back to user setting or default if necessary */
Alp Toker8f2d3f02014-02-24 10:40:15 +00005717 /* Middle initialization might never have occurred */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005718 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5719 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5720 if ( __kmp_nth <= __kmp_avail_proc ) {
5721 __kmp_zero_bt = FALSE;
5722 }
5723 }
5724#endif /* KMP_ADJUST_BLOCKTIME */
5725
5726 /* free the memory being used */
5727 if( __kmp_env_consistency_check ) {
5728 if ( thread->th.th_cons ) {
5729 __kmp_free_cons_stack( thread->th.th_cons );
5730 thread->th.th_cons = NULL;
5731 }; // if
5732 }
5733
5734 if ( thread->th.th_pri_common != NULL ) {
5735 __kmp_free( thread->th.th_pri_common );
5736 thread->th.th_pri_common = NULL;
5737 }; // if
5738
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005739 if (thread->th.th_task_state_memo_stack != NULL) {
5740 __kmp_free(thread->th.th_task_state_memo_stack);
5741 thread->th.th_task_state_memo_stack = NULL;
5742 }
5743
Jim Cownie5e8470a2013-09-27 10:38:44 +00005744 #if KMP_USE_BGET
5745 if ( thread->th.th_local.bget_data != NULL ) {
5746 __kmp_finalize_bget( thread );
5747 }; // if
5748 #endif
5749
Alp Toker98758b02014-03-02 04:12:06 +00005750#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00005751 if ( thread->th.th_affin_mask != NULL ) {
5752 KMP_CPU_FREE( thread->th.th_affin_mask );
5753 thread->th.th_affin_mask = NULL;
5754 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005755#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005756
5757 __kmp_reap_team( thread->th.th_serial_team );
5758 thread->th.th_serial_team = NULL;
5759 __kmp_free( thread );
5760
5761 KMP_MB();
5762
5763} // __kmp_reap_thread
5764
5765static void
5766__kmp_internal_end(void)
5767{
5768 int i;
5769
5770 /* First, unregister the library */
5771 __kmp_unregister_library();
5772
5773 #if KMP_OS_WINDOWS
5774 /* In Win static library, we can't tell when a root actually dies, so we
5775 reclaim the data structures for any root threads that have died but not
5776 unregistered themselves, in order to shut down cleanly.
5777 In Win dynamic library we also can't tell when a thread dies.
5778 */
5779 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
5780 #endif
5781
5782 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5783 if( __kmp_root[i] )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005784 if( __kmp_root[i]->r.r_active )
Jim Cownie5e8470a2013-09-27 10:38:44 +00005785 break;
5786 KMP_MB(); /* Flush all pending memory write invalidates. */
5787 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5788
5789 if ( i < __kmp_threads_capacity ) {
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005790#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005791 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5792 KMP_MB(); /* Flush all pending memory write invalidates. */
5793
5794 //
5795 // Need to check that monitor was initialized before reaping it.
5796 // If we are called form __kmp_atfork_child (which sets
5797 // __kmp_init_parallel = 0), then __kmp_monitor will appear to
5798 // contain valid data, but it is only valid in the parent process,
5799 // not the child.
5800 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00005801 // New behavior (201008): instead of keying off of the flag
5802 // __kmp_init_parallel, the monitor thread creation is keyed off
5803 // of the new flag __kmp_init_monitor.
5804 //
5805 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5806 if ( TCR_4( __kmp_init_monitor ) ) {
5807 __kmp_reap_monitor( & __kmp_monitor );
5808 TCW_4( __kmp_init_monitor, 0 );
5809 }
5810 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5811 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005812#endif // KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005813 } else {
5814 /* TODO move this to cleanup code */
5815 #ifdef KMP_DEBUG
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005816 /* make sure that everything has properly ended */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005817 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5818 if( __kmp_root[i] ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005819// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here
Jim Cownie77c2a632014-09-03 11:34:33 +00005820 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active?
Jim Cownie5e8470a2013-09-27 10:38:44 +00005821 }
5822 }
5823 #endif
5824
5825 KMP_MB();
5826
5827 // Reap the worker threads.
5828 // This is valid for now, but be careful if threads are reaped sooner.
5829 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
5830 // Get the next thread from the pool.
5831 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5832 __kmp_thread_pool = thread->th.th_next_pool;
5833 // Reap it.
5834 thread->th.th_next_pool = NULL;
5835 thread->th.th_in_pool = FALSE;
5836 __kmp_reap_thread( thread, 0 );
5837 }; // while
5838 __kmp_thread_pool_insert_pt = NULL;
5839
5840 // Reap teams.
5841 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
5842 // Get the next team from the pool.
5843 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5844 __kmp_team_pool = team->t.t_next_pool;
5845 // Reap it.
5846 team->t.t_next_pool = NULL;
5847 __kmp_reap_team( team );
5848 }; // while
5849
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005850 __kmp_reap_task_teams( );
Jim Cownie5e8470a2013-09-27 10:38:44 +00005851
5852 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5853 // TBD: Add some checking...
5854 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5855 }
5856
5857 /* Make sure all threadprivate destructors get run by joining with all worker
5858 threads before resetting this flag */
5859 TCW_SYNC_4(__kmp_init_common, FALSE);
5860
5861 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
5862 KMP_MB();
5863
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005864#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00005865 //
5866 // See note above: One of the possible fixes for CQ138434 / CQ140126
5867 //
5868 // FIXME: push both code fragments down and CSE them?
5869 // push them into __kmp_cleanup() ?
5870 //
5871 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5872 if ( TCR_4( __kmp_init_monitor ) ) {
5873 __kmp_reap_monitor( & __kmp_monitor );
5874 TCW_4( __kmp_init_monitor, 0 );
5875 }
5876 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5877 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005878#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005879 } /* else !__kmp_global.t_active */
5880 TCW_4(__kmp_init_gtid, FALSE);
5881 KMP_MB(); /* Flush all pending memory write invalidates. */
5882
Jim Cownie5e8470a2013-09-27 10:38:44 +00005883 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005884#if OMPT_SUPPORT
5885 ompt_fini();
5886#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005887}
5888
5889void
5890__kmp_internal_end_library( int gtid_req )
5891{
Jim Cownie5e8470a2013-09-27 10:38:44 +00005892 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5893 /* this shouldn't be a race condition because __kmp_internal_end() is the
5894 * only place to clear __kmp_serial_init */
5895 /* we'll check this later too, after we get the lock */
5896 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
5897 // because the next check will work in any case.
5898 if( __kmp_global.g.g_abort ) {
5899 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
5900 /* TODO abort? */
5901 return;
5902 }
5903 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5904 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
5905 return;
5906 }
5907
5908
5909 KMP_MB(); /* Flush all pending memory write invalidates. */
5910
5911 /* find out who we are and what we should do */
5912 {
5913 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5914 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5915 if( gtid == KMP_GTID_SHUTDOWN ) {
5916 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5917 return;
5918 } else if( gtid == KMP_GTID_MONITOR ) {
5919 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5920 return;
5921 } else if( gtid == KMP_GTID_DNE ) {
5922 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5923 /* we don't know who we are, but we may still shutdown the library */
5924 } else if( KMP_UBER_GTID( gtid )) {
5925 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005926 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005927 __kmp_global.g.g_abort = -1;
5928 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5929 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5930 return;
5931 } else {
5932 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5933 __kmp_unregister_root_current_thread( gtid );
5934 }
5935 } else {
5936 /* worker threads may call this function through the atexit handler, if they call exit() */
5937 /* For now, skip the usual subsequent processing and just dump the debug buffer.
5938 TODO: do a thorough shutdown instead
5939 */
5940 #ifdef DUMP_DEBUG_ON_EXIT
5941 if ( __kmp_debug_buf )
5942 __kmp_dump_debug_buffer( );
5943 #endif
5944 return;
5945 }
5946 }
5947 /* synchronize the termination process */
5948 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5949
5950 /* have we already finished */
5951 if( __kmp_global.g.g_abort ) {
5952 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
5953 /* TODO abort? */
5954 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5955 return;
5956 }
5957 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5958 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5959 return;
5960 }
5961
5962 /* We need this lock to enforce mutex between this reading of
5963 __kmp_threads_capacity and the writing by __kmp_register_root.
5964 Alternatively, we can use a counter of roots that is
5965 atomically updated by __kmp_get_global_thread_id_reg,
5966 __kmp_do_serial_initialize and __kmp_internal_end_*.
5967 */
5968 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5969
5970 /* now we can safely conduct the actual termination */
5971 __kmp_internal_end();
5972
5973 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5974 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5975
5976 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
5977
5978 #ifdef DUMP_DEBUG_ON_EXIT
5979 if ( __kmp_debug_buf )
5980 __kmp_dump_debug_buffer();
5981 #endif
5982
5983 #if KMP_OS_WINDOWS
5984 __kmp_close_console();
5985 #endif
5986
5987 __kmp_fini_allocator();
5988
5989} // __kmp_internal_end_library
5990
5991void
5992__kmp_internal_end_thread( int gtid_req )
5993{
5994 int i;
5995
5996 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5997 /* this shouldn't be a race condition because __kmp_internal_end() is the
5998 * only place to clear __kmp_serial_init */
5999 /* we'll check this later too, after we get the lock */
6000 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
6001 // because the next check will work in any case.
6002 if( __kmp_global.g.g_abort ) {
6003 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
6004 /* TODO abort? */
6005 return;
6006 }
6007 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6008 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
6009 return;
6010 }
6011
6012 KMP_MB(); /* Flush all pending memory write invalidates. */
6013
6014 /* find out who we are and what we should do */
6015 {
6016 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6017 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6018 if( gtid == KMP_GTID_SHUTDOWN ) {
6019 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6020 return;
6021 } else if( gtid == KMP_GTID_MONITOR ) {
6022 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6023 return;
6024 } else if( gtid == KMP_GTID_DNE ) {
6025 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6026 return;
6027 /* we don't know who we are */
6028 } else if( KMP_UBER_GTID( gtid )) {
6029 /* unregister ourselves as an uber thread. gtid is no longer valid */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006030 if( __kmp_root[gtid]->r.r_active ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006031 __kmp_global.g.g_abort = -1;
6032 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6033 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6034 return;
6035 } else {
6036 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6037 __kmp_unregister_root_current_thread( gtid );
6038 }
6039 } else {
6040 /* just a worker thread, let's leave */
6041 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6042
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006043 if ( gtid >= 0 ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00006044 __kmp_threads[gtid]->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006045 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006046
6047 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6048 return;
6049 }
6050 }
Jonathan Peyton99016992015-05-26 17:32:53 +00006051 #if defined KMP_DYNAMIC_LIB
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006052 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
6053 // because we will better shutdown later in the library destructor.
6054 // The reason of this change is performance problem when non-openmp thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00006055 // in a loop forks and joins many openmp threads. We can save a lot of time
6056 // keeping worker threads alive until the program shutdown.
6057 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
6058 // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
Jim Cownie77c2a632014-09-03 11:34:33 +00006059 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006060 return;
6061 #endif
6062 /* synchronize the termination process */
6063 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6064
6065 /* have we already finished */
6066 if( __kmp_global.g.g_abort ) {
6067 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
6068 /* TODO abort? */
6069 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6070 return;
6071 }
6072 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6073 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6074 return;
6075 }
6076
6077 /* We need this lock to enforce mutex between this reading of
6078 __kmp_threads_capacity and the writing by __kmp_register_root.
6079 Alternatively, we can use a counter of roots that is
6080 atomically updated by __kmp_get_global_thread_id_reg,
6081 __kmp_do_serial_initialize and __kmp_internal_end_*.
6082 */
6083
6084 /* should we finish the run-time? are all siblings done? */
6085 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6086
6087 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6088 if ( KMP_UBER_GTID( i ) ) {
6089 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6090 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6091 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6092 return;
6093 };
6094 }
6095
6096 /* now we can safely conduct the actual termination */
6097
6098 __kmp_internal_end();
6099
6100 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6101 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6102
Jim Cownie77c2a632014-09-03 11:34:33 +00006103 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006104
6105 #ifdef DUMP_DEBUG_ON_EXIT
6106 if ( __kmp_debug_buf )
6107 __kmp_dump_debug_buffer();
6108 #endif
6109} // __kmp_internal_end_thread
6110
6111// -------------------------------------------------------------------------------------------------
6112// Library registration stuff.
6113
6114static long __kmp_registration_flag = 0;
6115 // Random value used to indicate library initialization.
6116static char * __kmp_registration_str = NULL;
6117 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6118
6119
6120static inline
6121char *
6122__kmp_reg_status_name() {
6123 /*
6124 On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
6125 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
6126 the name of registered_lib_env env var can not be found, because the name will contain different pid.
6127 */
6128 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
6129} // __kmp_reg_status_get
6130
6131
6132void
6133__kmp_register_library_startup(
6134 void
6135) {
6136
6137 char * name = __kmp_reg_status_name(); // Name of the environment variable.
6138 int done = 0;
6139 union {
6140 double dtime;
6141 long ltime;
6142 } time;
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006143 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +00006144 __kmp_initialize_system_tick();
6145 #endif
6146 __kmp_read_system_time( & time.dtime );
6147 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6148 __kmp_registration_str =
6149 __kmp_str_format(
6150 "%p-%lx-%s",
6151 & __kmp_registration_flag,
6152 __kmp_registration_flag,
6153 KMP_LIBRARY_FILE
6154 );
6155
6156 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6157
6158 while ( ! done ) {
6159
6160 char * value = NULL; // Actual value of the environment variable.
6161
6162 // Set environment variable, but do not overwrite if it is exist.
6163 __kmp_env_set( name, __kmp_registration_str, 0 );
6164 // Check the variable is written.
6165 value = __kmp_env_get( name );
6166 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6167
6168 done = 1; // Ok, environment variable set successfully, exit the loop.
6169
6170 } else {
6171
6172 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6173 // Check whether it alive or dead.
6174 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6175 char * tail = value;
6176 char * flag_addr_str = NULL;
6177 char * flag_val_str = NULL;
6178 char const * file_name = NULL;
6179 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
6180 __kmp_str_split( tail, '-', & flag_val_str, & tail );
6181 file_name = tail;
6182 if ( tail != NULL ) {
6183 long * flag_addr = 0;
6184 long flag_val = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00006185 KMP_SSCANF( flag_addr_str, "%p", & flag_addr );
6186 KMP_SSCANF( flag_val_str, "%lx", & flag_val );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006187 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
6188 // First, check whether environment-encoded address is mapped into addr space.
6189 // If so, dereference it to see if it still has the right value.
6190
6191 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6192 neighbor = 1;
6193 } else {
6194 // If not, then we know the other copy of the library is no longer running.
6195 neighbor = 2;
6196 }; // if
6197 }; // if
6198 }; // if
6199 switch ( neighbor ) {
6200 case 0 : // Cannot parse environment variable -- neighbor status unknown.
6201 // Assume it is the incompatible format of future version of the library.
6202 // Assume the other library is alive.
6203 // WARN( ... ); // TODO: Issue a warning.
6204 file_name = "unknown library";
6205 // Attention! Falling to the next case. That's intentional.
6206 case 1 : { // Neighbor is alive.
6207 // Check it is allowed.
6208 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
6209 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6210 // That's not allowed. Issue fatal error.
6211 __kmp_msg(
6212 kmp_ms_fatal,
6213 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6214 KMP_HNT( DuplicateLibrary ),
6215 __kmp_msg_null
6216 );
6217 }; // if
6218 KMP_INTERNAL_FREE( duplicate_ok );
6219 __kmp_duplicate_library_ok = 1;
6220 done = 1; // Exit the loop.
6221 } break;
6222 case 2 : { // Neighbor is dead.
6223 // Clear the variable and try to register library again.
6224 __kmp_env_unset( name );
6225 } break;
6226 default : {
6227 KMP_DEBUG_ASSERT( 0 );
6228 } break;
6229 }; // switch
6230
6231 }; // if
6232 KMP_INTERNAL_FREE( (void *) value );
6233
6234 }; // while
6235 KMP_INTERNAL_FREE( (void *) name );
6236
6237} // func __kmp_register_library_startup
6238
6239
6240void
6241__kmp_unregister_library( void ) {
6242
6243 char * name = __kmp_reg_status_name();
6244 char * value = __kmp_env_get( name );
6245
6246 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6247 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6248 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6249 // Ok, this is our variable. Delete it.
6250 __kmp_env_unset( name );
6251 }; // if
6252
6253 KMP_INTERNAL_FREE( __kmp_registration_str );
6254 KMP_INTERNAL_FREE( value );
6255 KMP_INTERNAL_FREE( name );
6256
6257 __kmp_registration_flag = 0;
6258 __kmp_registration_str = NULL;
6259
6260} // __kmp_unregister_library
6261
6262
6263// End of Library registration stuff.
6264// -------------------------------------------------------------------------------------------------
6265
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006266#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6267
6268static void __kmp_check_mic_type()
6269{
6270 kmp_cpuid_t cpuid_state = {0};
6271 kmp_cpuid_t * cs_p = &cpuid_state;
Jonathan Peyton7be075332015-06-22 15:53:50 +00006272 __kmp_x86_cpuid(1, 0, cs_p);
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006273 // We don't support mic1 at the moment
6274 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6275 __kmp_mic_type = mic2;
6276 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6277 __kmp_mic_type = mic3;
6278 } else {
6279 __kmp_mic_type = non_mic;
6280 }
6281}
6282
6283#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
6284
Jim Cownie5e8470a2013-09-27 10:38:44 +00006285static void
6286__kmp_do_serial_initialize( void )
6287{
6288 int i, gtid;
6289 int size;
6290
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006291 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006292
6293 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
6294 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
6295 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
6296 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
6297 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
6298
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006299#if OMPT_SUPPORT
6300 ompt_pre_init();
6301#endif
6302
Jim Cownie5e8470a2013-09-27 10:38:44 +00006303 __kmp_validate_locks();
6304
6305 /* Initialize internal memory allocator */
6306 __kmp_init_allocator();
6307
6308 /* Register the library startup via an environment variable
6309 and check to see whether another copy of the library is already
6310 registered. */
6311
6312 __kmp_register_library_startup( );
6313
6314 /* TODO reinitialization of library */
6315 if( TCR_4(__kmp_global.g.g_done) ) {
6316 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
6317 }
6318
6319 __kmp_global.g.g_abort = 0;
6320 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6321
6322 /* initialize the locks */
6323#if KMP_USE_ADAPTIVE_LOCKS
6324#if KMP_DEBUG_ADAPTIVE_LOCKS
6325 __kmp_init_speculative_stats();
6326#endif
6327#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006328#if KMP_STATS_ENABLED
6329 __kmp_init_tas_lock( & __kmp_stats_lock );
6330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006331 __kmp_init_lock( & __kmp_global_lock );
6332 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6333 __kmp_init_lock( & __kmp_debug_lock );
6334 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6335 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6336 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6337 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6338 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6339 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6340 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6341 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6342 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6343 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6344 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6345 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6346 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6347 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6348 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006349#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006350 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006352 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6353
6354 /* conduct initialization and initial setup of configuration */
6355
6356 __kmp_runtime_initialize();
6357
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006358#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6359 __kmp_check_mic_type();
6360#endif
6361
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362 // Some global variable initialization moved here from kmp_env_initialize()
6363#ifdef KMP_DEBUG
6364 kmp_diag = 0;
6365#endif
6366 __kmp_abort_delay = 0;
6367
6368 // From __kmp_init_dflt_team_nth()
6369 /* assume the entire machine will be used */
6370 __kmp_dflt_team_nth_ub = __kmp_xproc;
6371 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6372 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6373 }
6374 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6375 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6376 }
6377 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006378
6379 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
6380 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006381#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00006382 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6383 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006384#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006385 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6386 __kmp_library = library_throughput;
6387 // From KMP_SCHEDULE initialization
6388 __kmp_static = kmp_sch_static_balanced;
6389 // AC: do not use analytical here, because it is non-monotonous
6390 //__kmp_guided = kmp_sch_guided_iterative_chunked;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006391 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
Jim Cownie5e8470a2013-09-27 10:38:44 +00006392 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
6393 // control parts
6394 #if KMP_FAST_REDUCTION_BARRIER
6395 #define kmp_reduction_barrier_gather_bb ((int)1)
6396 #define kmp_reduction_barrier_release_bb ((int)1)
6397 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6398 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6399 #endif // KMP_FAST_REDUCTION_BARRIER
6400 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6401 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6402 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6403 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6404 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6405 #if KMP_FAST_REDUCTION_BARRIER
6406 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
6407 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6408 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6409 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6410 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6411 }
6412 #endif // KMP_FAST_REDUCTION_BARRIER
6413 }
6414 #if KMP_FAST_REDUCTION_BARRIER
6415 #undef kmp_reduction_barrier_release_pat
6416 #undef kmp_reduction_barrier_gather_pat
6417 #undef kmp_reduction_barrier_release_bb
6418 #undef kmp_reduction_barrier_gather_bb
6419 #endif // KMP_FAST_REDUCTION_BARRIER
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006420#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
Jonathan Peytonf6498622016-01-11 20:37:39 +00006421 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006422 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00006423 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006424 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release
6425 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6426 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6427 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006428#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peytonf6498622016-01-11 20:37:39 +00006429 if (__kmp_mic_type == mic2) { // KNC
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006430 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6431 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6432 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006433#endif
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006434#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006435
6436 // From KMP_CHECKS initialization
6437#ifdef KMP_DEBUG
6438 __kmp_env_checks = TRUE; /* development versions have the extra checks */
6439#else
6440 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
6441#endif
6442
6443 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6444 __kmp_foreign_tp = TRUE;
6445
6446 __kmp_global.g.g_dynamic = FALSE;
6447 __kmp_global.g.g_dynamic_mode = dynamic_default;
6448
6449 __kmp_env_initialize( NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006450
Jim Cownie5e8470a2013-09-27 10:38:44 +00006451 // Print all messages in message catalog for testing purposes.
6452 #ifdef KMP_DEBUG
6453 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
6454 if ( __kmp_str_match_true( val ) ) {
6455 kmp_str_buf_t buffer;
6456 __kmp_str_buf_init( & buffer );
Jim Cownie181b4bb2013-12-23 17:28:57 +00006457 __kmp_i18n_dump_catalog( & buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006458 __kmp_printf( "%s", buffer.str );
6459 __kmp_str_buf_free( & buffer );
6460 }; // if
6461 __kmp_env_free( & val );
6462 #endif
6463
Jim Cownie181b4bb2013-12-23 17:28:57 +00006464 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006465 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6466 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6467
Jim Cownie5e8470a2013-09-27 10:38:44 +00006468 // If the library is shut down properly, both pools must be NULL. Just in case, set them
6469 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
6470 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6471 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6472 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6473 __kmp_thread_pool = NULL;
6474 __kmp_thread_pool_insert_pt = NULL;
6475 __kmp_team_pool = NULL;
6476
6477 /* Allocate all of the variable sized records */
6478 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
6479 /* Since allocation is cache-aligned, just add extra padding at the end */
6480 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6481 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6482 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
6483
6484 /* init thread counts */
6485 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
6486 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
6487 __kmp_all_nth = 0;
6488 __kmp_nth = 0;
6489
6490 /* setup the uber master thread and hierarchy */
6491 gtid = __kmp_register_root( TRUE );
6492 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
6493 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6494 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6495
6496 KMP_MB(); /* Flush all pending memory write invalidates. */
6497
6498 __kmp_common_initialize();
6499
6500 #if KMP_OS_UNIX
6501 /* invoke the child fork handler */
6502 __kmp_register_atfork();
6503 #endif
6504
Jonathan Peyton99016992015-05-26 17:32:53 +00006505 #if ! defined KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00006506 {
6507 /* Invoke the exit handler when the program finishes, only for static library.
6508 For dynamic library, we already have _fini and DllMain.
6509 */
6510 int rc = atexit( __kmp_internal_end_atexit );
6511 if ( rc != 0 ) {
6512 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6513 }; // if
6514 }
6515 #endif
6516
6517 #if KMP_HANDLE_SIGNALS
6518 #if KMP_OS_UNIX
6519 /* NOTE: make sure that this is called before the user installs
6520 * their own signal handlers so that the user handlers
6521 * are called first. this way they can return false,
6522 * not call our handler, avoid terminating the library,
6523 * and continue execution where they left off. */
6524 __kmp_install_signals( FALSE );
6525 #endif /* KMP_OS_UNIX */
6526 #if KMP_OS_WINDOWS
6527 __kmp_install_signals( TRUE );
6528 #endif /* KMP_OS_WINDOWS */
6529 #endif
6530
6531 /* we have finished the serial initialization */
6532 __kmp_init_counter ++;
6533
6534 __kmp_init_serial = TRUE;
6535
6536 if (__kmp_settings) {
6537 __kmp_env_print();
6538 }
6539
6540#if OMP_40_ENABLED
6541 if (__kmp_display_env || __kmp_display_env_verbose) {
6542 __kmp_env_print_2();
6543 }
6544#endif // OMP_40_ENABLED
6545
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006546#if OMPT_SUPPORT
6547 ompt_post_init();
6548#endif
6549
Jim Cownie5e8470a2013-09-27 10:38:44 +00006550 KMP_MB();
6551
6552 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
6553}
6554
6555void
6556__kmp_serial_initialize( void )
6557{
6558 if ( __kmp_init_serial ) {
6559 return;
6560 }
6561 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6562 if ( __kmp_init_serial ) {
6563 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6564 return;
6565 }
6566 __kmp_do_serial_initialize();
6567 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6568}
6569
6570static void
6571__kmp_do_middle_initialize( void )
6572{
6573 int i, j;
6574 int prev_dflt_team_nth;
6575
6576 if( !__kmp_init_serial ) {
6577 __kmp_do_serial_initialize();
6578 }
6579
6580 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
6581
6582 //
6583 // Save the previous value for the __kmp_dflt_team_nth so that
6584 // we can avoid some reinitialization if it hasn't changed.
6585 //
6586 prev_dflt_team_nth = __kmp_dflt_team_nth;
6587
Alp Toker98758b02014-03-02 04:12:06 +00006588#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006589 //
6590 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6591 // number of cores on the machine.
6592 //
6593 __kmp_affinity_initialize();
6594
6595 //
6596 // Run through the __kmp_threads array and set the affinity mask
6597 // for each root thread that is currently registered with the RTL.
6598 //
6599 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6600 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6601 __kmp_affinity_set_init_mask( i, TRUE );
6602 }
6603 }
Alp Toker98758b02014-03-02 04:12:06 +00006604#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006605
6606 KMP_ASSERT( __kmp_xproc > 0 );
6607 if ( __kmp_avail_proc == 0 ) {
6608 __kmp_avail_proc = __kmp_xproc;
6609 }
6610
6611 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
6612 j = 0;
Jonathan Peyton9e6eb482015-05-26 16:38:26 +00006613 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006614 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6615 j++;
6616 }
6617
6618 if ( __kmp_dflt_team_nth == 0 ) {
6619#ifdef KMP_DFLT_NTH_CORES
6620 //
6621 // Default #threads = #cores
6622 //
6623 __kmp_dflt_team_nth = __kmp_ncores;
6624 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6625 __kmp_dflt_team_nth ) );
6626#else
6627 //
6628 // Default #threads = #available OS procs
6629 //
6630 __kmp_dflt_team_nth = __kmp_avail_proc;
6631 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6632 __kmp_dflt_team_nth ) );
6633#endif /* KMP_DFLT_NTH_CORES */
6634 }
6635
6636 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6637 __kmp_dflt_team_nth = KMP_MIN_NTH;
6638 }
6639 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6640 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6641 }
6642
6643 //
6644 // There's no harm in continuing if the following check fails,
6645 // but it indicates an error in the previous logic.
6646 //
6647 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6648
6649 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6650 //
6651 // Run through the __kmp_threads array and set the num threads icv
6652 // for each root thread that is currently registered with the RTL
6653 // (which has not already explicitly set its nthreads-var with a
6654 // call to omp_set_num_threads()).
6655 //
6656 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6657 kmp_info_t *thread = __kmp_threads[ i ];
6658 if ( thread == NULL ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006659 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006660
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006661 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006662 }
6663 }
6664 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6665 __kmp_dflt_team_nth) );
6666
6667#ifdef KMP_ADJUST_BLOCKTIME
6668 /* Adjust blocktime to zero if necessary */
6669 /* now that __kmp_avail_proc is set */
6670 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6671 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6672 if ( __kmp_nth > __kmp_avail_proc ) {
6673 __kmp_zero_bt = TRUE;
6674 }
6675 }
6676#endif /* KMP_ADJUST_BLOCKTIME */
6677
6678 /* we have finished middle initialization */
6679 TCW_SYNC_4(__kmp_init_middle, TRUE);
6680
6681 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
6682}
6683
6684void
6685__kmp_middle_initialize( void )
6686{
6687 if ( __kmp_init_middle ) {
6688 return;
6689 }
6690 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6691 if ( __kmp_init_middle ) {
6692 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6693 return;
6694 }
6695 __kmp_do_middle_initialize();
6696 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6697}
6698
6699void
6700__kmp_parallel_initialize( void )
6701{
6702 int gtid = __kmp_entry_gtid(); // this might be a new root
6703
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006704 /* synchronize parallel initialization (for sibling) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006705 if( TCR_4(__kmp_init_parallel) ) return;
6706 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6707 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
6708
6709 /* TODO reinitialization after we have already shut down */
6710 if( TCR_4(__kmp_global.g.g_done) ) {
6711 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6712 __kmp_infinite_loop();
6713 }
6714
6715 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
6716 would cause a deadlock. So we call __kmp_do_serial_initialize directly.
6717 */
6718 if( !__kmp_init_middle ) {
6719 __kmp_do_middle_initialize();
6720 }
6721
6722 /* begin initialization */
6723 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
6724 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6725
6726#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6727 //
6728 // Save the FP control regs.
6729 // Worker threads will set theirs to these values at thread startup.
6730 //
6731 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6732 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6733 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6734#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6735
6736#if KMP_OS_UNIX
6737# if KMP_HANDLE_SIGNALS
6738 /* must be after __kmp_serial_initialize */
6739 __kmp_install_signals( TRUE );
6740# endif
6741#endif
6742
6743 __kmp_suspend_initialize();
6744
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006745#if defined(USE_LOAD_BALANCE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00006746 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6747 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6748 }
6749#else
6750 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6751 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6752 }
6753#endif
6754
6755 if ( __kmp_version ) {
6756 __kmp_print_version_2();
6757 }
6758
Jim Cownie5e8470a2013-09-27 10:38:44 +00006759 /* we have finished parallel initialization */
6760 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6761
6762 KMP_MB();
6763 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
6764
6765 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6766}
6767
6768
6769/* ------------------------------------------------------------------------ */
6770
6771void
6772__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6773 kmp_team_t *team )
6774{
6775 kmp_disp_t *dispatch;
6776
6777 KMP_MB();
6778
6779 /* none of the threads have encountered any constructs, yet. */
6780 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006781#if KMP_CACHE_MANAGE
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006782 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006783#endif /* KMP_CACHE_MANAGE */
6784 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6785 KMP_DEBUG_ASSERT( dispatch );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006786 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6787 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006788
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006789 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006790#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00006791 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
6792#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006793 if( __kmp_env_consistency_check )
6794 __kmp_push_parallel( gtid, team->t.t_ident );
6795
6796 KMP_MB(); /* Flush all pending memory write invalidates. */
6797}
6798
6799void
6800__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
6801 kmp_team_t *team )
6802{
6803 if( __kmp_env_consistency_check )
6804 __kmp_pop_parallel( gtid, team->t.t_ident );
6805}
6806
6807int
6808__kmp_invoke_task_func( int gtid )
6809{
6810 int rc;
6811 int tid = __kmp_tid_from_gtid( gtid );
6812 kmp_info_t *this_thr = __kmp_threads[ gtid ];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006813 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006814
6815 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6816#if USE_ITT_BUILD
6817 if ( __itt_stack_caller_create_ptr ) {
6818 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
6819 }
6820#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006821#if INCLUDE_SSC_MARKS
6822 SSC_MARK_INVOKING();
6823#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006824
6825#if OMPT_SUPPORT
6826 void *dummy;
6827 void **exit_runtime_p;
6828 ompt_task_id_t my_task_id;
6829 ompt_parallel_id_t my_parallel_id;
6830
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006831 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006832 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6833 ompt_task_info.frame.exit_runtime_frame);
6834 } else {
6835 exit_runtime_p = &dummy;
6836 }
6837
6838#if OMPT_TRACE
6839 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6840 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00006841 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006842 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6843 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6844 my_parallel_id, my_task_id);
6845 }
6846#endif
6847#endif
6848
Jonathan Peyton45be4502015-08-11 21:36:41 +00006849 {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00006850 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6851 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
Jonathan Peyton45be4502015-08-11 21:36:41 +00006852 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6853 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006854#if OMPT_SUPPORT
Jonathan Peyton45be4502015-08-11 21:36:41 +00006855 , exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006856#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006857 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006858#if OMPT_SUPPORT
6859 *exit_runtime_p = NULL;
6860#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +00006861 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006862
Jim Cownie5e8470a2013-09-27 10:38:44 +00006863#if USE_ITT_BUILD
6864 if ( __itt_stack_caller_create_ptr ) {
6865 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
6866 }
6867#endif /* USE_ITT_BUILD */
6868 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6869
6870 return rc;
6871}
6872
6873#if OMP_40_ENABLED
6874void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006875__kmp_teams_master( int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00006876{
6877 // This routine is called by all master threads in teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006878 kmp_info_t *thr = __kmp_threads[ gtid ];
6879 kmp_team_t *team = thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006880 ident_t *loc = team->t.t_ident;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006881 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6882 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6883 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006885 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006886 // Launch league of teams now, but not let workers execute
6887 // (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006888#if INCLUDE_SSC_MARKS
6889 SSC_MARK_FORKING();
6890#endif
6891 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +00006892 team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006893#if OMPT_SUPPORT
6894 (void *)thr->th.th_teams_microtask, // "unwrapped" task
6895#endif
6896 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +00006897 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6898 NULL );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006899#if INCLUDE_SSC_MARKS
6900 SSC_MARK_JOINING();
6901#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006902
Jonathan Peyton3fdf3292015-07-21 18:03:30 +00006903 // AC: last parameter "1" eliminates join barrier which won't work because
6904 // worker threads are in a fork barrier waiting for more parallel regions
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006905 __kmp_join_call( loc, gtid
6906#if OMPT_SUPPORT
6907 , fork_context_intel
6908#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006909 , 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006910}
6911
6912int
6913__kmp_invoke_teams_master( int gtid )
6914{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006915 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6916 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006917 #if KMP_DEBUG
6918 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6919 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
6920 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006921 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6922 __kmp_teams_master( gtid );
6923 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006924 return 1;
6925}
6926#endif /* OMP_40_ENABLED */
6927
6928/* this sets the requested number of threads for the next parallel region
6929 * encountered by this team */
6930/* since this should be enclosed in the forkjoin critical section it
6931 * should avoid race conditions with assymmetrical nested parallelism */
6932
6933void
6934__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
6935{
6936 kmp_info_t *thr = __kmp_threads[gtid];
6937
6938 if( num_threads > 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006939 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006940}
6941
6942#if OMP_40_ENABLED
6943
6944/* this sets the requested number of teams for the teams region and/or
6945 * the number of threads for the next parallel region encountered */
6946void
6947__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
6948{
6949 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006950 KMP_DEBUG_ASSERT(num_teams >= 0);
6951 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006952
6953 if( num_teams == 0 )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006954 num_teams = 1; // default number of teams is 1.
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006955 if( num_teams > __kmp_max_nth ) { // if too many teams requested?
6956 if ( !__kmp_reserve_warn ) {
6957 __kmp_reserve_warn = 1;
6958 __kmp_msg(
6959 kmp_ms_warning,
6960 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6961 KMP_HNT( Unset_ALL_THREADS ),
6962 __kmp_msg_null
6963 );
6964 }
6965 num_teams = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006966 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006967 // Set number of teams (number of threads in the outer "parallel" of the teams)
6968 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6969
6970 // Remember the number of threads for inner parallel regions
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006971 if( num_threads == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006972 if( !TCR_4(__kmp_init_middle) )
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006973 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006974 num_threads = __kmp_avail_proc / num_teams;
6975 if( num_teams * num_threads > __kmp_max_nth ) {
6976 // adjust num_threads w/o warning as it is not user setting
6977 num_threads = __kmp_max_nth / num_teams;
6978 }
6979 } else {
6980 if( num_teams * num_threads > __kmp_max_nth ) {
6981 int new_threads = __kmp_max_nth / num_teams;
6982 if ( !__kmp_reserve_warn ) { // user asked for too many threads
6983 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6984 __kmp_msg(
6985 kmp_ms_warning,
6986 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6987 KMP_HNT( Unset_ALL_THREADS ),
6988 __kmp_msg_null
6989 );
6990 }
6991 num_threads = new_threads;
6992 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006993 }
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006994 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006995}
6996
6997
6998//
6999// Set the proc_bind var to use in the following parallel region.
7000//
7001void
7002__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7003{
7004 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007005 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007006}
7007
7008#endif /* OMP_40_ENABLED */
7009
7010/* Launch the worker threads into the microtask. */
7011
7012void
7013__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7014{
7015 kmp_info_t *this_thr = __kmp_threads[gtid];
7016
7017#ifdef KMP_DEBUG
7018 int f;
7019#endif /* KMP_DEBUG */
7020
7021 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007022 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007023 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7024 KMP_MB(); /* Flush all pending memory write invalidates. */
7025
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007026 team->t.t_construct = 0; /* no single directives seen yet */
7027 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007028
7029 /* Reset the identifiers on the dispatch buffer */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007030 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007031 if ( team->t.t_max_nproc > 1 ) {
7032 int i;
Jonathan Peyton067325f2016-05-31 19:01:15 +00007033 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007034 team->t.t_disp_buffer[ i ].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007035#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007036 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7037#endif
7038 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007039 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007040 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007041#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00007042 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7043#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007044 }
7045
7046 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007047 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007048
7049#ifdef KMP_DEBUG
7050 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7051 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7052 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7053 }
7054#endif /* KMP_DEBUG */
7055
7056 /* release the worker threads so they may begin working */
7057 __kmp_fork_barrier( gtid, 0 );
7058}
7059
7060
7061void
7062__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7063{
7064 kmp_info_t *this_thr = __kmp_threads[gtid];
7065
7066 KMP_DEBUG_ASSERT( team );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007067 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007068 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7069 KMP_MB(); /* Flush all pending memory write invalidates. */
7070
7071 /* Join barrier after fork */
7072
7073#ifdef KMP_DEBUG
7074 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7075 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7076 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7077 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7078 __kmp_print_structure();
7079 }
7080 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7081 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7082#endif /* KMP_DEBUG */
7083
7084 __kmp_join_barrier( gtid ); /* wait for everyone */
7085
7086 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007087 KMP_ASSERT( this_thr->th.th_team == team );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007088}
7089
7090
7091/* ------------------------------------------------------------------------ */
7092/* ------------------------------------------------------------------------ */
7093
7094#ifdef USE_LOAD_BALANCE
7095
7096//
7097// Return the worker threads actively spinning in the hot team, if we
7098// are at the outermost level of parallelism. Otherwise, return 0.
7099//
7100static int
7101__kmp_active_hot_team_nproc( kmp_root_t *root )
7102{
7103 int i;
7104 int retval;
7105 kmp_team_t *hot_team;
7106
7107 if ( root->r.r_active ) {
7108 return 0;
7109 }
7110 hot_team = root->r.r_hot_team;
7111 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7112 return hot_team->t.t_nproc - 1; // Don't count master thread
7113 }
7114
7115 //
7116 // Skip the master thread - it is accounted for elsewhere.
7117 //
7118 retval = 0;
7119 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7120 if ( hot_team->t.t_threads[i]->th.th_active ) {
7121 retval++;
7122 }
7123 }
7124 return retval;
7125}
7126
7127//
7128// Perform an automatic adjustment to the number of
7129// threads used by the next parallel region.
7130//
7131static int
7132__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
7133{
7134 int retval;
7135 int pool_active;
7136 int hot_team_active;
7137 int team_curr_active;
7138 int system_active;
7139
7140 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7141 root, set_nproc ) );
7142 KMP_DEBUG_ASSERT( root );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007143 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007144 KMP_DEBUG_ASSERT( set_nproc > 1 );
7145
7146 if ( set_nproc == 1) {
7147 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
7148 return 1;
7149 }
7150
7151 //
7152 // Threads that are active in the thread pool, active in the hot team
7153 // for this particular root (if we are at the outer par level), and
7154 // the currently executing thread (to become the master) are available
7155 // to add to the new team, but are currently contributing to the system
7156 // load, and must be accounted for.
7157 //
7158 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7159 hot_team_active = __kmp_active_hot_team_nproc( root );
7160 team_curr_active = pool_active + hot_team_active + 1;
7161
7162 //
7163 // Check the system load.
7164 //
7165 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7166 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7167 system_active, pool_active, hot_team_active ) );
7168
7169 if ( system_active < 0 ) {
7170 //
7171 // There was an error reading the necessary info from /proc,
7172 // so use the thread limit algorithm instead. Once we set
7173 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
7174 // we shouldn't wind up getting back here.
7175 //
7176 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7177 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
7178
7179 //
7180 // Make this call behave like the thread limit algorithm.
7181 //
7182 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7183 : root->r.r_hot_team->t.t_nproc);
7184 if ( retval > set_nproc ) {
7185 retval = set_nproc;
7186 }
7187 if ( retval < KMP_MIN_NTH ) {
7188 retval = KMP_MIN_NTH;
7189 }
7190
7191 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7192 return retval;
7193 }
7194
7195 //
7196 // There is a slight delay in the load balance algorithm in detecting
7197 // new running procs. The real system load at this instant should be
7198 // at least as large as the #active omp thread that are available to
7199 // add to the team.
7200 //
7201 if ( system_active < team_curr_active ) {
7202 system_active = team_curr_active;
7203 }
7204 retval = __kmp_avail_proc - system_active + team_curr_active;
7205 if ( retval > set_nproc ) {
7206 retval = set_nproc;
7207 }
7208 if ( retval < KMP_MIN_NTH ) {
7209 retval = KMP_MIN_NTH;
7210 }
7211
7212 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7213 return retval;
7214} // __kmp_load_balance_nproc()
7215
7216#endif /* USE_LOAD_BALANCE */
7217
Jim Cownie5e8470a2013-09-27 10:38:44 +00007218/* ------------------------------------------------------------------------ */
7219/* ------------------------------------------------------------------------ */
7220
7221/* NOTE: this is called with the __kmp_init_lock held */
7222void
7223__kmp_cleanup( void )
7224{
7225 int f;
7226
7227 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
7228
7229 if (TCR_4(__kmp_init_parallel)) {
7230#if KMP_HANDLE_SIGNALS
7231 __kmp_remove_signals();
7232#endif
7233 TCW_4(__kmp_init_parallel, FALSE);
7234 }
7235
7236 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007237#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007238 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007239#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton17078362015-09-10 19:22:07 +00007240 __kmp_cleanup_hierarchy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007241 TCW_4(__kmp_init_middle, FALSE);
7242 }
7243
7244 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
7245
7246 if (__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007247 __kmp_runtime_destroy();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007248 __kmp_init_serial = FALSE;
7249 }
7250
7251 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7252 if ( __kmp_root[ f ] != NULL ) {
7253 __kmp_free( __kmp_root[ f ] );
7254 __kmp_root[ f ] = NULL;
7255 }
7256 }
7257 __kmp_free( __kmp_threads );
7258 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
7259 // freeing __kmp_root.
7260 __kmp_threads = NULL;
7261 __kmp_root = NULL;
7262 __kmp_threads_capacity = 0;
7263
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007264#if KMP_USE_DYNAMIC_LOCK
7265 __kmp_cleanup_indirect_user_locks();
7266#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00007267 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007268#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007269
Alp Toker98758b02014-03-02 04:12:06 +00007270 #if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00007271 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
7272 __kmp_cpuinfo_file = NULL;
Alp Toker98758b02014-03-02 04:12:06 +00007273 #endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007274
7275 #if KMP_USE_ADAPTIVE_LOCKS
7276 #if KMP_DEBUG_ADAPTIVE_LOCKS
7277 __kmp_print_speculative_stats();
7278 #endif
7279 #endif
7280 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7281 __kmp_nested_nth.nth = NULL;
7282 __kmp_nested_nth.size = 0;
7283 __kmp_nested_nth.used = 0;
7284
7285 __kmp_i18n_catclose();
7286
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007287#if KMP_STATS_ENABLED
7288 __kmp_accumulate_stats_at_exit();
7289 __kmp_stats_list.deallocate();
7290#endif
7291
Jim Cownie5e8470a2013-09-27 10:38:44 +00007292 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
7293}
7294
7295/* ------------------------------------------------------------------------ */
7296/* ------------------------------------------------------------------------ */
7297
7298int
7299__kmp_ignore_mppbeg( void )
7300{
7301 char *env;
7302
7303 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
7304 if (__kmp_str_match_false( env ))
7305 return FALSE;
7306 }
7307 // By default __kmpc_begin() is no-op.
7308 return TRUE;
7309}
7310
7311int
7312__kmp_ignore_mppend( void )
7313{
7314 char *env;
7315
7316 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
7317 if (__kmp_str_match_false( env ))
7318 return FALSE;
7319 }
7320 // By default __kmpc_end() is no-op.
7321 return TRUE;
7322}
7323
7324void
7325__kmp_internal_begin( void )
7326{
7327 int gtid;
7328 kmp_root_t *root;
7329
7330 /* this is a very important step as it will register new sibling threads
7331 * and assign these new uber threads a new gtid */
7332 gtid = __kmp_entry_gtid();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007333 root = __kmp_threads[ gtid ]->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007334 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7335
7336 if( root->r.r_begin ) return;
7337 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7338 if( root->r.r_begin ) {
7339 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7340 return;
7341 }
7342
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007343 root->r.r_begin = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007344
7345 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7346}
7347
7348
7349/* ------------------------------------------------------------------------ */
7350/* ------------------------------------------------------------------------ */
7351
7352void
7353__kmp_user_set_library (enum library_type arg)
7354{
7355 int gtid;
7356 kmp_root_t *root;
7357 kmp_info_t *thread;
7358
7359 /* first, make sure we are initialized so we can get our gtid */
7360
7361 gtid = __kmp_entry_gtid();
7362 thread = __kmp_threads[ gtid ];
7363
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007364 root = thread->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007365
7366 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7367 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
7368 KMP_WARNING( SetLibraryIncorrectCall );
7369 return;
7370 }
7371
7372 switch ( arg ) {
7373 case library_serial :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007374 thread->th.th_set_nproc = 0;
7375 set__nproc( thread, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007376 break;
7377 case library_turnaround :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007378 thread->th.th_set_nproc = 0;
7379 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007380 break;
7381 case library_throughput :
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007382 thread->th.th_set_nproc = 0;
7383 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007384 break;
7385 default:
7386 KMP_FATAL( UnknownLibraryType, arg );
7387 }
7388
7389 __kmp_aux_set_library ( arg );
7390}
7391
7392void
7393__kmp_aux_set_stacksize( size_t arg )
7394{
7395 if (! __kmp_init_serial)
7396 __kmp_serial_initialize();
7397
7398#if KMP_OS_DARWIN
7399 if (arg & (0x1000 - 1)) {
7400 arg &= ~(0x1000 - 1);
7401 if(arg + 0x1000) /* check for overflow if we round up */
7402 arg += 0x1000;
7403 }
7404#endif
7405 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7406
7407 /* only change the default stacksize before the first parallel region */
7408 if (! TCR_4(__kmp_init_parallel)) {
7409 size_t value = arg; /* argument is in bytes */
7410
7411 if (value < __kmp_sys_min_stksize )
7412 value = __kmp_sys_min_stksize ;
7413 else if (value > KMP_MAX_STKSIZE)
7414 value = KMP_MAX_STKSIZE;
7415
7416 __kmp_stksize = value;
7417
7418 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7419 }
7420
7421 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7422}
7423
7424/* set the behaviour of the runtime library */
7425/* TODO this can cause some odd behaviour with sibling parallelism... */
7426void
7427__kmp_aux_set_library (enum library_type arg)
7428{
7429 __kmp_library = arg;
7430
7431 switch ( __kmp_library ) {
7432 case library_serial :
7433 {
7434 KMP_INFORM( LibraryIsSerial );
7435 (void) __kmp_change_library( TRUE );
7436 }
7437 break;
7438 case library_turnaround :
7439 (void) __kmp_change_library( TRUE );
7440 break;
7441 case library_throughput :
7442 (void) __kmp_change_library( FALSE );
7443 break;
7444 default:
7445 KMP_FATAL( UnknownLibraryType, arg );
7446 }
7447}
7448
7449/* ------------------------------------------------------------------------ */
7450/* ------------------------------------------------------------------------ */
7451
7452void
7453__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
7454{
7455 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007456#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007457 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007458#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007459 int bt_set;
7460
7461 __kmp_save_internal_controls( thread );
7462
7463 /* Normalize and set blocktime for the teams */
7464 if (blocktime < KMP_MIN_BLOCKTIME)
7465 blocktime = KMP_MIN_BLOCKTIME;
7466 else if (blocktime > KMP_MAX_BLOCKTIME)
7467 blocktime = KMP_MAX_BLOCKTIME;
7468
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007469 set__blocktime_team( thread->th.th_team, tid, blocktime );
7470 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007471
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007472#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +00007473 /* Calculate and set blocktime intervals for the teams */
7474 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7475
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007476 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7477 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007478#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007479
7480 /* Set whether blocktime has been set to "TRUE" */
7481 bt_set = TRUE;
7482
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007483 set__bt_set_team( thread->th.th_team, tid, bt_set );
7484 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007485 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d"
7486#if KMP_USE_MONITOR
7487 ", bt_intervals=%d, monitor_updates=%d"
7488#endif
7489 "\n",
7490 __kmp_gtid_from_tid(tid, thread->th.th_team), thread->th.th_team->t.t_id, tid, blocktime
7491#if KMP_USE_MONITOR
7492 , bt_intervals, __kmp_monitor_wakeups
7493#endif
7494 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007495}
7496
7497void
7498__kmp_aux_set_defaults(
7499 char const * str,
7500 int len
7501) {
7502 if ( ! __kmp_init_serial ) {
7503 __kmp_serial_initialize();
7504 };
7505 __kmp_env_initialize( str );
7506
7507 if (__kmp_settings
7508#if OMP_40_ENABLED
7509 || __kmp_display_env || __kmp_display_env_verbose
7510#endif // OMP_40_ENABLED
7511 ) {
7512 __kmp_env_print();
7513 }
7514} // __kmp_aux_set_defaults
7515
7516/* ------------------------------------------------------------------------ */
7517
7518/*
7519 * internal fast reduction routines
7520 */
7521
Jim Cownie5e8470a2013-09-27 10:38:44 +00007522PACKED_REDUCTION_METHOD_T
7523__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
7524 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7525 kmp_critical_name *lck )
7526{
7527
7528 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
7529 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
7530 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
7531 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
7532
7533 PACKED_REDUCTION_METHOD_T retval;
7534
7535 int team_size;
7536
7537 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
7538 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
7539
7540 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7541 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7542
7543 retval = critical_reduce_block;
7544
7545 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
7546
7547 if( team_size == 1 ) {
7548
7549 retval = empty_reduce_block;
7550
7551 } else {
7552
7553 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7554 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7555
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007556 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007557
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007558 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jonathan Peyton91b78702015-06-08 19:39:07 +00007559
7560 int teamsize_cutoff = 4;
7561
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007562#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7563 if( __kmp_mic_type != non_mic ) {
7564 teamsize_cutoff = 8;
7565 }
7566#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007567 if( tree_available ) {
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007568 if( team_size <= teamsize_cutoff ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007569 if ( atomic_available ) {
7570 retval = atomic_reduce_block;
7571 }
7572 } else {
7573 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7574 }
7575 } else if ( atomic_available ) {
7576 retval = atomic_reduce_block;
7577 }
7578 #else
7579 #error "Unknown or unsupported OS"
Joerg Sonnenberger1564f3c2015-09-21 20:02:45 +00007580 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007581
Andrey Churbanovcbda8682015-01-13 14:43:35 +00007582 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
Jim Cownie5e8470a2013-09-27 10:38:44 +00007583
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007584 #if KMP_OS_LINUX || KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00007585
Jim Cownie5e8470a2013-09-27 10:38:44 +00007586 // basic tuning
7587
7588 if( atomic_available ) {
7589 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
7590 retval = atomic_reduce_block;
7591 }
7592 } // otherwise: use critical section
7593
7594 #elif KMP_OS_DARWIN
7595
Jim Cownie5e8470a2013-09-27 10:38:44 +00007596 if( atomic_available && ( num_vars <= 3 ) ) {
7597 retval = atomic_reduce_block;
7598 } else if( tree_available ) {
7599 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
7600 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7601 }
7602 } // otherwise: use critical section
7603
7604 #else
7605 #error "Unknown or unsupported OS"
7606 #endif
7607
7608 #else
7609 #error "Unknown or unsupported architecture"
7610 #endif
7611
7612 }
7613
Jim Cownie5e8470a2013-09-27 10:38:44 +00007614 // KMP_FORCE_REDUCTION
7615
Andrey Churbanovec23a952015-08-17 10:12:12 +00007616 // If the team is serialized (team_size == 1), ignore the forced reduction
7617 // method and stay with the unsynchronized method (empty_reduce_block)
7618 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007619
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007620 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007621
7622 int atomic_available, tree_available;
7623
7624 switch( ( forced_retval = __kmp_force_reduction_method ) )
7625 {
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007626 case critical_reduce_block:
Jim Cownie5e8470a2013-09-27 10:38:44 +00007627 KMP_ASSERT( lck ); // lck should be != 0
Jim Cownie5e8470a2013-09-27 10:38:44 +00007628 break;
7629
7630 case atomic_reduce_block:
7631 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007632 if( ! atomic_available ) {
7633 KMP_WARNING(RedMethodNotSupported, "atomic");
7634 forced_retval = critical_reduce_block;
7635 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007636 break;
7637
7638 case tree_reduce_block:
7639 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton5c32d5e2016-09-02 18:29:45 +00007640 if( ! tree_available ) {
7641 KMP_WARNING(RedMethodNotSupported, "tree");
7642 forced_retval = critical_reduce_block;
7643 } else {
7644 #if KMP_FAST_REDUCTION_BARRIER
7645 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7646 #endif
7647 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007648 break;
7649
7650 default:
7651 KMP_ASSERT( 0 ); // "unsupported method specified"
7652 }
7653
7654 retval = forced_retval;
7655 }
7656
7657 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
7658
7659 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7660 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7661
7662 return ( retval );
7663}
7664
7665// this function is for testing set/get/determine reduce method
7666kmp_int32
7667__kmp_get_reduce_method( void ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007668 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007669}
7670
7671/* ------------------------------------------------------------------------ */