blob: 7b7bbde5e94d9aa7ac31efef8f646129bbc513d8 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#if OMPT_SUPPORT
25#include "ompt-internal.h"
26#include "ompt-specific.h"
27#endif
28
Jim Cownie5e8470a2013-09-27 10:38:44 +000029#define MAX_MESSAGE 512
30
31/* ------------------------------------------------------------------------ */
32/* ------------------------------------------------------------------------ */
33
34/* flags will be used in future, e.g., to implement */
35/* openmp_strict library restrictions */
36
37/*!
38 * @ingroup STARTUP_SHUTDOWN
39 * @param loc in source location information
40 * @param flags in for future use (currently ignored)
41 *
42 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000043 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000044 *
45 */
46void
47__kmpc_begin(ident_t *loc, kmp_int32 flags)
48{
49 // By default __kmp_ignore_mppbeg() returns TRUE.
50 if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
52
53 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
54 }
55}
56
57/*!
58 * @ingroup STARTUP_SHUTDOWN
59 * @param loc source location information
60 *
61 * Shutdown the runtime library. This is also optional, and even if called will not
62 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
63 */
64void
65__kmpc_end(ident_t *loc)
66{
67 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
68 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
69 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
70 // will unregister this root (it can cause library shut down).
71 if (__kmp_ignore_mppend() == FALSE) {
72 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
73 KA_TRACE( 30, ("__kmpc_end\n" ));
74
75 __kmp_internal_end_thread( -1 );
76 }
77}
78
79/*!
80@ingroup THREAD_STATES
81@param loc Source location information.
82@return The global thread index of the active thread.
83
84This function can be called in any context.
85
86If the runtime has ony been entered at the outermost level from a
87single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000088which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000089active parallel construct. (Or zero if there is no active parallel
90construct, since the master thread is necessarily thread zero).
91
92If multiple non-OpenMP threads all enter an OpenMP construct then this
93will be a unique thread identifier among all the threads created by
94the OpenMP runtime (but the value cannote be defined in terms of
95OpenMP thread ids returned by omp_get_thread_num()).
96
97*/
98kmp_int32
99__kmpc_global_thread_num(ident_t *loc)
100{
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
104
105 return gtid;
106}
107
108/*!
109@ingroup THREAD_STATES
110@param loc Source location information.
111@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113This function can be called in any context.
114It returns the total number of threads under the control of the OpenMP runtime. That is
115not a number that can be determined by any OpenMP standard calls, since the library may be
116called from more than one non-OpenMP thread, and this reflects the total over all such calls.
117Similarly the runtime maintains underlying threads even when they are not active (since the cost
118of creating and destroying OS threads is high), this call counts all such threads even if they are not
119waiting for work.
120*/
121kmp_int32
122__kmpc_global_num_threads(ident_t *loc)
123{
Andrey Churbanov76d42852016-12-21 21:20:20 +0000124 KC_TRACE(10,("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000125
Andrey Churbanov76d42852016-12-21 21:20:20 +0000126 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127}
128
129/*!
130@ingroup THREAD_STATES
131@param loc Source location information.
132@return The thread number of the calling thread in the innermost active parallel construct.
133
134*/
135kmp_int32
136__kmpc_bound_thread_num(ident_t *loc)
137{
138 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
139 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
147kmp_int32
148__kmpc_bound_num_threads(ident_t *loc)
149{
150 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
151
152 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
153}
154
155/*!
156 * @ingroup DEPRECATED
157 * @param loc location description
158 *
159 * This function need not be called. It always returns TRUE.
160 */
161kmp_int32
162__kmpc_ok_to_fork(ident_t *loc)
163{
164#ifndef KMP_DEBUG
165
166 return TRUE;
167
168#else
169
170 const char *semi2;
171 const char *semi3;
172 int line_no;
173
174 if (__kmp_par_range == 0) {
175 return TRUE;
176 }
177 semi2 = loc->psource;
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 semi2 = strchr(semi2, ';');
182 if (semi2 == NULL) {
183 return TRUE;
184 }
185 semi2 = strchr(semi2 + 1, ';');
186 if (semi2 == NULL) {
187 return TRUE;
188 }
189 if (__kmp_par_range_filename[0]) {
190 const char *name = semi2 - 1;
191 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
192 name--;
193 }
194 if ((*name == '/') || (*name == ';')) {
195 name++;
196 }
197 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
198 return __kmp_par_range < 0;
199 }
200 }
201 semi3 = strchr(semi2 + 1, ';');
202 if (__kmp_par_range_routine[0]) {
203 if ((semi3 != NULL) && (semi3 > semi2)
204 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
205 return __kmp_par_range < 0;
206 }
207 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000208 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
210 return __kmp_par_range > 0;
211 }
212 return __kmp_par_range < 0;
213 }
214 return TRUE;
215
216#endif /* KMP_DEBUG */
217
218}
219
220/*!
221@ingroup THREAD_STATES
222@param loc Source location information.
223@return 1 if this thread is executing inside an active parallel region, zero if not.
224*/
225kmp_int32
226__kmpc_in_parallel( ident_t *loc )
227{
228 return __kmp_entry_thread() -> th.th_root -> r.r_active;
229}
230
231/*!
232@ingroup PARALLEL
233@param loc source location information
234@param global_tid global thread number
235@param num_threads number of threads requested for this parallel construct
236
237Set the number of threads to be used by the next fork spawned by this thread.
238This call is only required if the parallel construct has a `num_threads` clause.
239*/
240void
241__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
242{
243 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
244 global_tid, num_threads ) );
245
246 __kmp_push_num_threads( loc, global_tid, num_threads );
247}
248
249void
250__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
251{
252 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
253
254 /* the num_threads are automatically popped */
255}
256
257
258#if OMP_40_ENABLED
259
260void
261__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
262{
263 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
264 global_tid, proc_bind ) );
265
266 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
267}
268
269#endif /* OMP_40_ENABLED */
270
271
272/*!
273@ingroup PARALLEL
274@param loc source location information
275@param argc total number of arguments in the ellipsis
276@param microtask pointer to callback routine consisting of outlined parallel construct
277@param ... pointers to shared variables that aren't global
278
279Do the actual fork and call the microtask in the relevant number of threads.
280*/
281void
282__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
283{
284 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000285
Jonathan Peyton61118492016-05-20 19:03:38 +0000286#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000287 int inParallel = __kmpc_in_parallel(loc);
288 if (inParallel)
289 {
290 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
291 }
292 else
293 {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000304 ompt_frame_t* ompt_frame;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000305 if (ompt_enabled) {
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000306 kmp_info_t *master_th = __kmp_threads[ gtid ];
307 kmp_team_t *parent_team = master_th->th.th_team;
308 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
309 if (lwt)
310 ompt_frame = &(lwt->ompt_task_info.frame);
311 else
312 {
313 int tid = __kmp_tid_from_gtid( gtid );
314 ompt_frame = &(parent_team->t.t_implicit_task_taskdata[tid].
315 ompt_task_info.frame);
316 }
317 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000318 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000319#endif
320
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000321#if INCLUDE_SSC_MARKS
322 SSC_MARK_FORKING();
323#endif
324 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000326#if OMPT_SUPPORT
327 VOLATILE_CAST(void *) microtask, // "unwrapped" task
328#endif
329 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
331/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000332#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 &ap
334#else
335 ap
336#endif
337 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000338#if INCLUDE_SSC_MARKS
339 SSC_MARK_JOINING();
340#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000341 __kmp_join_call( loc, gtid
342#if OMPT_SUPPORT
343 , fork_context_intel
344#endif
345 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346
347 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000348
Jim Cownie5e8470a2013-09-27 10:38:44 +0000349 }
350}
351
352#if OMP_40_ENABLED
353/*!
354@ingroup PARALLEL
355@param loc source location information
356@param global_tid global thread number
357@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000358@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359
360Set the number of teams to be used by the teams construct.
361This call is only required if the teams construct has a `num_teams` clause
362or a `thread_limit` clause (or both).
363*/
364void
365__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
366{
367 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
368 global_tid, num_teams, num_threads ) );
369
370 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
371}
372
373/*!
374@ingroup PARALLEL
375@param loc source location information
376@param argc total number of arguments in the ellipsis
377@param microtask pointer to callback routine consisting of outlined teams construct
378@param ... pointers to shared variables that aren't global
379
380Do the actual fork and call the microtask in the relevant number of threads.
381*/
382void
383__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
384{
385 int gtid = __kmp_entry_gtid();
386 kmp_info_t *this_thr = __kmp_threads[ gtid ];
387 va_list ap;
388 va_start( ap, microtask );
389
Jonathan Peyton45be4502015-08-11 21:36:41 +0000390 KMP_COUNT_BLOCK(OMP_TEAMS);
391
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000393 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
395
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000396#if OMPT_SUPPORT
397 kmp_team_t *parent_team = this_thr->th.th_team;
398 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000399 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400 parent_team->t.t_implicit_task_taskdata[tid].
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000401 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000402 }
403#endif
404
Jim Cownie5e8470a2013-09-27 10:38:44 +0000405 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000406 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000407 __kmp_push_num_teams( loc, gtid, 0, 0 );
408 }
409 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000413 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000415#if OMPT_SUPPORT
416 VOLATILE_CAST(void *) microtask, // "unwrapped" task
417#endif
418 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000420#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000421 &ap
422#else
423 ap
424#endif
425 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000426 __kmp_join_call( loc, gtid
427#if OMPT_SUPPORT
428 , fork_context_intel
429#endif
430 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000431
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000432 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000434 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435 va_end( ap );
436}
437#endif /* OMP_40_ENABLED */
438
439
440//
441// I don't think this function should ever have been exported.
442// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
443// openmp code ever called it, but it's been exported from the RTL for so
444// long that I'm afraid to remove the definition.
445//
446int
447__kmpc_invoke_task_func( int gtid )
448{
449 return __kmp_invoke_task_func( gtid );
450}
451
452/*!
453@ingroup PARALLEL
454@param loc source location information
455@param global_tid global thread number
456
457Enter a serialized parallel construct. This interface is used to handle a
458conditional parallel region, like this,
459@code
460#pragma omp parallel if (condition)
461@endcode
462when the condition is false.
463*/
464void
465__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
466{
Jonathan Peytonde4749b2016-12-14 23:01:24 +0000467 // The implementation is now in kmp_runtime.cpp so that it can share static
468 // functions with kmp_fork_call since the tasks to be done are similar in
469 // each case.
470 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000471}
472
473/*!
474@ingroup PARALLEL
475@param loc source location information
476@param global_tid global thread number
477
478Leave a serialized parallel construct.
479*/
480void
481__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
482{
483 kmp_internal_control_t *top;
484 kmp_info_t *this_thr;
485 kmp_team_t *serial_team;
486
487 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
488
489 /* skip all this code for autopar serialized loops since it results in
490 unacceptable overhead */
491 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
492 return;
493
494 // Not autopar code
495 if( ! TCR_4( __kmp_init_parallel ) )
496 __kmp_parallel_initialize();
497
498 this_thr = __kmp_threads[ global_tid ];
499 serial_team = this_thr->th.th_serial_team;
500
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000501 #if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000502 kmp_task_team_t * task_team = this_thr->th.th_task_team;
503
504 // we need to wait for the proxy tasks before finishing the thread
505 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
Jonathan Peyton7abf9d52016-05-26 18:19:10 +0000506 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) ); // is an ITT object needed here?
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000507 #endif
508
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509 KMP_MB();
510 KMP_DEBUG_ASSERT( serial_team );
511 KMP_ASSERT( serial_team -> t.t_serialized );
512 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
513 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
514 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
515 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
516
517 /* If necessary, pop the internal control stack values and replace the team values */
518 top = serial_team -> t.t_control_stack_top;
519 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000520 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521 serial_team -> t.t_control_stack_top = top -> next;
522 __kmp_free(top);
523 }
524
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525 //if( serial_team -> t.t_serialized > 1 )
526 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
528 /* pop dispatch buffers stack */
529 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
530 {
531 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
532 serial_team->t.t_dispatch->th_disp_buffer =
533 serial_team->t.t_dispatch->th_disp_buffer->next;
534 __kmp_free( disp_buffer );
535 }
536
537 -- serial_team -> t.t_serialized;
538 if ( serial_team -> t.t_serialized == 0 ) {
539
540 /* return to the parallel section */
541
542#if KMP_ARCH_X86 || KMP_ARCH_X86_64
543 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
544 __kmp_clear_x87_fpu_status_word();
545 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
546 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
547 }
548#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
549
550 this_thr -> th.th_team = serial_team -> t.t_parent;
551 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
552
553 /* restore values cached in the thread */
554 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
555 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
556 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
557
558 /* TODO the below shouldn't need to be adjusted for serialized teams */
559 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
560 t.t_dispatch[ serial_team -> t.t_master_tid ];
561
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562 __kmp_pop_current_task_from_thread( this_thr );
563
564 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
565 this_thr -> th.th_current_task -> td_flags.executing = 1;
566
567 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000568 // Copy the task team from the new child / old parent team to the thread.
569 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
571 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
572 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000573 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
575 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
576 global_tid, serial_team, serial_team -> t.t_serialized ) );
577 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578 }
579
Jim Cownie5e8470a2013-09-27 10:38:44 +0000580 if ( __kmp_env_consistency_check )
581 __kmp_pop_parallel( global_tid, NULL );
582}
583
584/*!
585@ingroup SYNCHRONIZATION
586@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000588Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589depending on the memory ordering convention obeyed by the compiler
590even that may not be necessary).
591*/
592void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000593__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594{
595 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
596
597 /* need explicit __mf() here since use volatile instead in library */
598 KMP_MB(); /* Flush all pending memory write invalidates. */
599
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000600 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
601 #if KMP_MIC
602 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
603 // We shouldn't need it, though, since the ABI rules require that
604 // * If the compiler generates NGO stores it also generates the fence
605 // * If users hand-code NGO stores they should insert the fence
606 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000608 // C74404
609 // This is to address non-temporal store instructions (sfence needed).
610 // The clflush instruction is addressed either (mfence needed).
611 // Probably the non-temporal load monvtdqa instruction should also be addressed.
612 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
613 if ( ! __kmp_cpuinfo.initialized ) {
614 __kmp_query_cpuid( & __kmp_cpuinfo );
615 }; // if
616 if ( ! __kmp_cpuinfo.sse2 ) {
617 // CPU cannot execute SSE2 instructions.
618 } else {
Jonathan Peyton61118492016-05-20 19:03:38 +0000619 #if KMP_COMPILER_ICC
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000620 _mm_mfence();
Jonathan Peytonb7d30cb2016-03-23 16:27:25 +0000621 #elif KMP_COMPILER_MSVC
622 MemoryBarrier();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000623 #else
624 __sync_synchronize();
625 #endif // KMP_COMPILER_ICC
626 }; // if
627 #endif // KMP_MIC
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +0000628 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000629 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000630 #elif KMP_ARCH_PPC64
631 // Nothing needed here (we have a real MB above).
632 #if KMP_OS_CNK
633 // The flushing thread needs to yield here; this prevents a
634 // busy-waiting thread from saturating the pipeline. flush is
635 // often used in loops like this:
636 // while (!flag) {
637 // #pragma omp flush(flag)
638 // }
639 // and adding the yield here is good for at least a 10x speedup
640 // when running >2 threads per core (on the NAS LU benchmark).
641 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000642 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000643 #else
644 #error Unknown or unsupported architecture
645 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646
647}
648
649/* -------------------------------------------------------------------------- */
650
651/* -------------------------------------------------------------------------- */
652
653/*!
654@ingroup SYNCHRONIZATION
655@param loc source location information
656@param global_tid thread id.
657
658Execute a barrier.
659*/
660void
661__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
662{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000663 KMP_COUNT_BLOCK(OMP_BARRIER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
665
666 if (! TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668
669 if ( __kmp_env_consistency_check ) {
670 if ( loc == 0 ) {
671 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
672 }; // if
673
674 __kmp_check_barrier( global_tid, ct_barrier, loc );
675 }
676
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000677#if OMPT_SUPPORT && OMPT_TRACE
678 ompt_frame_t * ompt_frame;
679 if (ompt_enabled ) {
Jonas Hahnfeld848d6902016-09-14 13:59:39 +0000680 ompt_frame = __ompt_get_task_frame_internal(0);
681 if ( ompt_frame->reenter_runtime_frame == NULL )
682 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000683 }
684#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685 __kmp_threads[ global_tid ]->th.th_ident = loc;
686 // TODO: explicit barrier_wait_id:
687 // this function is called when 'barrier' directive is present or
688 // implicit barrier at the end of a worksharing construct.
689 // 1) better to add a per-thread barrier counter to a thread data structure
690 // 2) set to 0 when a new team is created
691 // 4) no sync is required
692
693 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000694#if OMPT_SUPPORT && OMPT_TRACE
695 if (ompt_enabled ) {
696 ompt_frame->reenter_runtime_frame = NULL;
697 }
698#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000699}
700
701/* The BARRIER for a MASTER section is always explicit */
702/*!
703@ingroup WORK_SHARING
704@param loc source location information.
705@param global_tid global thread number .
706@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
707*/
708kmp_int32
709__kmpc_master(ident_t *loc, kmp_int32 global_tid)
710{
711 int status = 0;
712
713 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
714
715 if( ! TCR_4( __kmp_init_parallel ) )
716 __kmp_parallel_initialize();
717
Jonathan Peyton45be4502015-08-11 21:36:41 +0000718 if( KMP_MASTER_GTID( global_tid )) {
Jonathan Peyton30138252016-03-03 21:21:05 +0000719 KMP_COUNT_BLOCK(OMP_MASTER);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000720 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000722 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000724#if OMPT_SUPPORT && OMPT_TRACE
725 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000726 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000727 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000728 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
729 kmp_team_t *team = this_thr -> th.th_team;
730
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000731 int tid = __kmp_tid_from_gtid( global_tid );
732 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
733 team->t.ompt_team_info.parallel_id,
734 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
735 }
736 }
737#endif
738
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000740#if KMP_USE_DYNAMIC_LOCK
741 if (status)
742 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
743 else
744 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
745#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746 if (status)
747 __kmp_push_sync( global_tid, ct_master, loc, NULL );
748 else
749 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000750#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751 }
752
753 return status;
754}
755
756/*!
757@ingroup WORK_SHARING
758@param loc source location information.
759@param global_tid global thread number .
760
761Mark the end of a <tt>master</tt> region. This should only be called by the thread
762that executes the <tt>master</tt> region.
763*/
764void
765__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
766{
767 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
768
769 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000770 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000772#if OMPT_SUPPORT && OMPT_TRACE
773 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
774 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000775 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000776 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
777 int tid = __kmp_tid_from_gtid( global_tid );
778 ompt_callbacks.ompt_callback(ompt_event_master_end)(
779 team->t.ompt_team_info.parallel_id,
780 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
781 }
782#endif
783
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784 if ( __kmp_env_consistency_check ) {
785 if( global_tid < 0 )
786 KMP_WARNING( ThreadIdentInvalid );
787
788 if( KMP_MASTER_GTID( global_tid ))
789 __kmp_pop_sync( global_tid, ct_master, loc );
790 }
791}
792
793/*!
794@ingroup WORK_SHARING
795@param loc source location information.
796@param gtid global thread number.
797
798Start execution of an <tt>ordered</tt> construct.
799*/
800void
801__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
802{
803 int cid = 0;
804 kmp_info_t *th;
805 KMP_DEBUG_ASSERT( __kmp_init_serial );
806
807 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
808
809 if (! TCR_4(__kmp_init_parallel))
810 __kmp_parallel_initialize();
811
812#if USE_ITT_BUILD
813 __kmp_itt_ordered_prep( gtid );
814 // TODO: ordered_wait_id
815#endif /* USE_ITT_BUILD */
816
817 th = __kmp_threads[ gtid ];
818
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000819#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000820 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000821 /* OMPT state update */
822 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
823 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
824
825 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000826 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000827 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
828 th->th.ompt_thread_info.wait_id);
829 }
830 }
831#endif
832
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
834 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
835 else
836 __kmp_parallel_deo( & gtid, & cid, loc );
837
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000838#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000839 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000840 /* OMPT state update */
841 th->th.ompt_thread_info.state = ompt_state_work_parallel;
842 th->th.ompt_thread_info.wait_id = 0;
843
844 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000845 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000846 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
847 th->th.ompt_thread_info.wait_id);
848 }
849 }
850#endif
851
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852#if USE_ITT_BUILD
853 __kmp_itt_ordered_start( gtid );
854#endif /* USE_ITT_BUILD */
855}
856
857/*!
858@ingroup WORK_SHARING
859@param loc source location information.
860@param gtid global thread number.
861
862End execution of an <tt>ordered</tt> construct.
863*/
864void
865__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
866{
867 int cid = 0;
868 kmp_info_t *th;
869
870 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
871
872#if USE_ITT_BUILD
873 __kmp_itt_ordered_end( gtid );
874 // TODO: ordered_wait_id
875#endif /* USE_ITT_BUILD */
876
877 th = __kmp_threads[ gtid ];
878
879 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
880 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
881 else
882 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000883
884#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000885 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000886 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
887 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
888 th->th.ompt_thread_info.wait_id);
889 }
890#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000891}
892
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000893#if KMP_USE_DYNAMIC_LOCK
894
Jonathan Peytondae13d82015-12-11 21:57:06 +0000895static __forceinline void
896__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000897{
Jonathan Peytondae13d82015-12-11 21:57:06 +0000898 // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
899 void *idx;
900 kmp_indirect_lock_t **lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000901 lck = (kmp_indirect_lock_t **)crit;
Jonathan Peytondae13d82015-12-11 21:57:06 +0000902 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
903 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
904 KMP_SET_I_LOCK_LOCATION(ilk, loc);
905 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
906 KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000907#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000908 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000909#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000910 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
911 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000912#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000913 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000915 // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
916 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917 }
Jonathan Peytondae13d82015-12-11 21:57:06 +0000918 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000919}
920
921// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000922#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000923 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000924 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
925 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000926 kmp_uint32 spins; \
927 KMP_FSYNC_PREPARE(l); \
928 KMP_INIT_YIELD(spins); \
929 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
930 KMP_YIELD(TRUE); \
931 } else { \
932 KMP_YIELD_SPIN(spins); \
933 } \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000934 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000935 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
936 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000937 __kmp_spin_backoff(&backoff); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000938 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
939 KMP_YIELD(TRUE); \
940 } else { \
941 KMP_YIELD_SPIN(spins); \
942 } \
943 } \
944 } \
945 KMP_FSYNC_ACQUIRED(l); \
946}
947
948// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000949#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000950 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000951 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
952 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000953}
954
955// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000956#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
957 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000958 KMP_MB(); \
959}
960
Jonathan Peytondae13d82015-12-11 21:57:06 +0000961#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000962
963# include <unistd.h>
964# include <sys/syscall.h>
965# ifndef FUTEX_WAIT
966# define FUTEX_WAIT 0
967# endif
968# ifndef FUTEX_WAKE
969# define FUTEX_WAKE 1
970# endif
971
972// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000973#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000974 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
975 kmp_int32 gtid_code = (gtid+1) << 1; \
976 KMP_MB(); \
977 KMP_FSYNC_PREPARE(ftx); \
978 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000979 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
980 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
981 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000982 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000983 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000984 continue; \
985 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000986 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000987 } \
988 kmp_int32 rc; \
989 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
990 continue; \
991 } \
992 gtid_code |= 1; \
993 } \
994 KMP_FSYNC_ACQUIRED(ftx); \
995}
996
997// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000998#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000999 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytoneeec4c82016-06-22 16:36:07 +00001000 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001001 KMP_FSYNC_ACQUIRED(ftx); \
1002 rc = TRUE; \
1003 } else { \
1004 rc = FALSE; \
1005 } \
1006}
1007
1008// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001009#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001010 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1011 KMP_MB(); \
1012 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001013 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1014 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1015 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001016 } \
1017 KMP_MB(); \
1018 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1019}
1020
Jonathan Peytondae13d82015-12-11 21:57:06 +00001021#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001022
1023#else // KMP_USE_DYNAMIC_LOCK
1024
Jim Cownie5e8470a2013-09-27 10:38:44 +00001025static kmp_user_lock_p
1026__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1027{
1028 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1029
1030 //
1031 // Because of the double-check, the following load
1032 // doesn't need to be volatile.
1033 //
1034 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1035
1036 if ( lck == NULL ) {
1037 void * idx;
1038
1039 // Allocate & initialize the lock.
1040 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1041 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1042 __kmp_init_user_lock_with_checks( lck );
1043 __kmp_set_user_lock_location( lck, loc );
1044#if USE_ITT_BUILD
1045 __kmp_itt_critical_creating( lck );
1046 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1047 // lock. It is the only place where we can guarantee it. There are chances the lock will
1048 // destroyed with no usage, but it is not a problem, because this is not real event seen
1049 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1050#endif /* USE_ITT_BUILD */
1051
1052 //
1053 // Use a cmpxchg instruction to slam the start of the critical
1054 // section with the lock pointer. If another thread beat us
1055 // to it, deallocate the lock, and use the lock that the other
1056 // thread allocated.
1057 //
1058 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1059
1060 if ( status == 0 ) {
1061 // Deallocate the lock and reload the value.
1062#if USE_ITT_BUILD
1063 __kmp_itt_critical_destroyed( lck );
1064 // Let ITT know the lock is destroyed and the same memory location may be reused for
1065 // another purpose.
1066#endif /* USE_ITT_BUILD */
1067 __kmp_destroy_user_lock_with_checks( lck );
1068 __kmp_user_lock_free( &idx, gtid, lck );
1069 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1070 KMP_DEBUG_ASSERT( lck != NULL );
1071 }
1072 }
1073 return lck;
1074}
1075
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001076#endif // KMP_USE_DYNAMIC_LOCK
1077
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078/*!
1079@ingroup WORK_SHARING
1080@param loc source location information.
1081@param global_tid global thread number .
1082@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1083some other suitably unique value.
1084
1085Enter code protected by a `critical` construct.
1086This function blocks until the executing thread can enter the critical section.
1087*/
1088void
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001089__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1090{
1091#if KMP_USE_DYNAMIC_LOCK
1092 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1093#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001094 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001095 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096 kmp_user_lock_p lck;
1097
1098 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1099
1100 //TODO: add THR_OVHD_STATE
1101
1102 KMP_CHECK_USER_LOCK_INIT();
1103
1104 if ( ( __kmp_user_lock_kind == lk_tas )
1105 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1106 lck = (kmp_user_lock_p)crit;
1107 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001108#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109 else if ( ( __kmp_user_lock_kind == lk_futex )
1110 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1111 lck = (kmp_user_lock_p)crit;
1112 }
1113#endif
1114 else { // ticket, queuing or drdpa
1115 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1116 }
1117
1118 if ( __kmp_env_consistency_check )
1119 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1120
1121 /* since the critical directive binds to all threads, not just
1122 * the current team we have to check this even if we are in a
1123 * serialized team */
1124 /* also, even if we are the uber thread, we still have to conduct the lock,
1125 * as we have to contend with sibling threads */
1126
1127#if USE_ITT_BUILD
1128 __kmp_itt_critical_acquiring( lck );
1129#endif /* USE_ITT_BUILD */
1130 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1132
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001133#if USE_ITT_BUILD
1134 __kmp_itt_critical_acquired( lck );
1135#endif /* USE_ITT_BUILD */
1136
Jonathan Peyton93a879c2016-03-21 18:32:26 +00001137 KMP_START_EXPLICIT_TIMER(OMP_critical);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001138 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001139#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001140}
1141
1142#if KMP_USE_DYNAMIC_LOCK
1143
1144// Converts the given hint to an internal lock implementation
1145static __forceinline kmp_dyna_lockseq_t
1146__kmp_map_hint_to_lock(uintptr_t hint)
1147{
1148#if KMP_USE_TSX
1149# define KMP_TSX_LOCK(seq) lockseq_##seq
1150#else
1151# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1152#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001153
1154#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1155# define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1156#else
1157# define KMP_CPUINFO_RTM 0
1158#endif
1159
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001160 // Hints that do not require further logic
1161 if (hint & kmp_lock_hint_hle)
1162 return KMP_TSX_LOCK(hle);
1163 if (hint & kmp_lock_hint_rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001164 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001165 if (hint & kmp_lock_hint_adaptive)
Hal Finkel01bb2402016-03-27 13:24:09 +00001166 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001167
1168 // Rule out conflicting hints first by returning the default lock
1169 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1170 return __kmp_user_lock_seq;
1171 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1172 return __kmp_user_lock_seq;
1173
1174 // Do not even consider speculation when it appears to be contended
1175 if (hint & omp_lock_hint_contended)
1176 return lockseq_queuing;
1177
1178 // Uncontended lock without speculation
1179 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1180 return lockseq_tas;
1181
1182 // HLE lock for speculation
1183 if (hint & omp_lock_hint_speculative)
1184 return KMP_TSX_LOCK(hle);
1185
1186 return __kmp_user_lock_seq;
1187}
1188
1189/*!
1190@ingroup WORK_SHARING
1191@param loc source location information.
1192@param global_tid global thread number.
1193@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
1194or some other suitably unique value.
1195@param hint the lock hint.
1196
1197Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
1198This function blocks until the executing thread can enter the critical section unless the hint suggests use of
1199speculative execution and the hardware supports it.
1200*/
1201void
1202__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1203{
1204 KMP_COUNT_BLOCK(OMP_CRITICAL);
1205 kmp_user_lock_p lck;
1206
1207 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1208
1209 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1210 // Check if it is initialized.
1211 if (*lk == 0) {
1212 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1213 if (KMP_IS_D_LOCK(lckseq)) {
1214 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1215 } else {
1216 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1217 }
1218 }
1219 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1220 // this lock initialization does not follow the normal dispatch path (lock table is not used).
1221 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1222 lck = (kmp_user_lock_p)lk;
1223 if (__kmp_env_consistency_check) {
1224 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1225 }
1226# if USE_ITT_BUILD
1227 __kmp_itt_critical_acquiring(lck);
1228# endif
1229# if KMP_USE_INLINED_TAS
1230 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1231 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1232 } else
1233# elif KMP_USE_INLINED_FUTEX
1234 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1235 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1236 } else
1237# endif
1238 {
1239 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1240 }
1241 } else {
1242 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1243 lck = ilk->lock;
1244 if (__kmp_env_consistency_check) {
1245 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1246 }
1247# if USE_ITT_BUILD
1248 __kmp_itt_critical_acquiring(lck);
1249# endif
1250 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1251 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001252
Jim Cownie5e8470a2013-09-27 10:38:44 +00001253#if USE_ITT_BUILD
1254 __kmp_itt_critical_acquired( lck );
1255#endif /* USE_ITT_BUILD */
1256
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001257 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001258 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001259} // __kmpc_critical_with_hint
1260
1261#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001262
1263/*!
1264@ingroup WORK_SHARING
1265@param loc source location information.
1266@param global_tid global thread number .
1267@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1268some other suitably unique value.
1269
1270Leave a critical section, releasing any lock that was held during its execution.
1271*/
1272void
1273__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1274{
1275 kmp_user_lock_p lck;
1276
1277 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1278
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001279#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001280 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001281 lck = (kmp_user_lock_p)crit;
1282 KMP_ASSERT(lck != NULL);
1283 if (__kmp_env_consistency_check) {
1284 __kmp_pop_sync(global_tid, ct_critical, loc);
1285 }
1286# if USE_ITT_BUILD
1287 __kmp_itt_critical_releasing( lck );
1288# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00001289# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001290 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001291 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001292 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00001293# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001294 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001295 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001296 } else
1297# endif
1298 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001299 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001300 }
1301 } else {
1302 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1303 KMP_ASSERT(ilk != NULL);
1304 lck = ilk->lock;
1305 if (__kmp_env_consistency_check) {
1306 __kmp_pop_sync(global_tid, ct_critical, loc);
1307 }
1308# if USE_ITT_BUILD
1309 __kmp_itt_critical_releasing( lck );
1310# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001311 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001312 }
1313
1314#else // KMP_USE_DYNAMIC_LOCK
1315
Jim Cownie5e8470a2013-09-27 10:38:44 +00001316 if ( ( __kmp_user_lock_kind == lk_tas )
1317 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1318 lck = (kmp_user_lock_p)crit;
1319 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001320#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001321 else if ( ( __kmp_user_lock_kind == lk_futex )
1322 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1323 lck = (kmp_user_lock_p)crit;
1324 }
1325#endif
1326 else { // ticket, queuing or drdpa
1327 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1328 }
1329
1330 KMP_ASSERT(lck != NULL);
1331
1332 if ( __kmp_env_consistency_check )
1333 __kmp_pop_sync( global_tid, ct_critical, loc );
1334
1335#if USE_ITT_BUILD
1336 __kmp_itt_critical_releasing( lck );
1337#endif /* USE_ITT_BUILD */
1338 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001339 __kmp_release_user_lock_with_checks( lck, global_tid );
1340
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001341#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001342 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001343 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1344 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1345 (uint64_t) lck);
1346 }
1347#endif
1348
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001349#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001350 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001351 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1352}
1353
1354/*!
1355@ingroup SYNCHRONIZATION
1356@param loc source location information
1357@param global_tid thread id.
1358@return one if the thread should execute the master block, zero otherwise
1359
1360Start execution of a combined barrier and master. The barrier is executed inside this function.
1361*/
1362kmp_int32
1363__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1364{
1365 int status;
1366
1367 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1368
1369 if (! TCR_4(__kmp_init_parallel))
1370 __kmp_parallel_initialize();
1371
1372 if ( __kmp_env_consistency_check )
1373 __kmp_check_barrier( global_tid, ct_barrier, loc );
1374
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001375#if USE_ITT_NOTIFY
1376 __kmp_threads[global_tid]->th.th_ident = loc;
1377#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1379
1380 return (status != 0) ? 0 : 1;
1381}
1382
1383/*!
1384@ingroup SYNCHRONIZATION
1385@param loc source location information
1386@param global_tid thread id.
1387
1388Complete the execution of a combined barrier and master. This function should
1389only be called at the completion of the <tt>master</tt> code. Other threads will
1390still be waiting at the barrier and this call releases them.
1391*/
1392void
1393__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1394{
1395 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1396
1397 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1398}
1399
1400/*!
1401@ingroup SYNCHRONIZATION
1402@param loc source location information
1403@param global_tid thread id.
1404@return one if the thread should execute the master block, zero otherwise
1405
1406Start execution of a combined barrier and master(nowait) construct.
1407The barrier is executed inside this function.
1408There is no equivalent "end" function, since the
1409*/
1410kmp_int32
1411__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1412{
1413 kmp_int32 ret;
1414
1415 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1416
1417 if (! TCR_4(__kmp_init_parallel))
1418 __kmp_parallel_initialize();
1419
1420 if ( __kmp_env_consistency_check ) {
1421 if ( loc == 0 ) {
1422 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1423 }
1424 __kmp_check_barrier( global_tid, ct_barrier, loc );
1425 }
1426
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001427#if USE_ITT_NOTIFY
1428 __kmp_threads[global_tid]->th.th_ident = loc;
1429#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001430 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1431
1432 ret = __kmpc_master (loc, global_tid);
1433
1434 if ( __kmp_env_consistency_check ) {
1435 /* there's no __kmpc_end_master called; so the (stats) */
1436 /* actions of __kmpc_end_master are done here */
1437
1438 if ( global_tid < 0 ) {
1439 KMP_WARNING( ThreadIdentInvalid );
1440 }
1441 if (ret) {
1442 /* only one thread should do the pop since only */
1443 /* one did the push (see __kmpc_master()) */
1444
1445 __kmp_pop_sync( global_tid, ct_master, loc );
1446 }
1447 }
1448
1449 return (ret);
1450}
1451
1452/* The BARRIER for a SINGLE process section is always explicit */
1453/*!
1454@ingroup WORK_SHARING
1455@param loc source location information
1456@param global_tid global thread number
1457@return One if this thread should execute the single construct, zero otherwise.
1458
1459Test whether to execute a <tt>single</tt> construct.
1460There are no implicit barriers in the two "single" calls, rather the compiler should
1461introduce an explicit barrier if it is required.
1462*/
1463
1464kmp_int32
1465__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1466{
1467 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton30138252016-03-03 21:21:05 +00001468
1469 if (rc) {
1470 // We are going to execute the single statement, so we should count it.
1471 KMP_COUNT_BLOCK(OMP_SINGLE);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001472 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001473 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001474
1475#if OMPT_SUPPORT && OMPT_TRACE
1476 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1477 kmp_team_t *team = this_thr -> th.th_team;
1478 int tid = __kmp_tid_from_gtid( global_tid );
1479
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001480 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481 if (rc) {
1482 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1483 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1484 team->t.ompt_team_info.parallel_id,
1485 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1486 team->t.ompt_team_info.microtask);
1487 }
1488 } else {
1489 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1490 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1491 team->t.ompt_team_info.parallel_id,
1492 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1493 }
1494 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1495 }
1496 }
1497#endif
1498
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499 return rc;
1500}
1501
1502/*!
1503@ingroup WORK_SHARING
1504@param loc source location information
1505@param global_tid global thread number
1506
1507Mark the end of a <tt>single</tt> construct. This function should
1508only be called by the thread that executed the block of code protected
1509by the `single` construct.
1510*/
1511void
1512__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1513{
1514 __kmp_exit_single( global_tid );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001515 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001516
1517#if OMPT_SUPPORT && OMPT_TRACE
1518 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1519 kmp_team_t *team = this_thr -> th.th_team;
1520 int tid = __kmp_tid_from_gtid( global_tid );
1521
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001522 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001523 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1524 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1525 team->t.ompt_team_info.parallel_id,
1526 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1527 }
1528#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001529}
1530
1531/*!
1532@ingroup WORK_SHARING
1533@param loc Source location
1534@param global_tid Global thread id
1535
1536Mark the end of a statically scheduled loop.
1537*/
1538void
1539__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1540{
1541 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1542
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001543#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001544 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001545 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001546 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1547 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001548 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001549 team_info->parallel_id, task_info->task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001550 }
1551#endif
1552
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553 if ( __kmp_env_consistency_check )
1554 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1555}
1556
1557/*
1558 * User routines which take C-style arguments (call by value)
1559 * different from the Fortran equivalent routines
1560 */
1561
1562void
1563ompc_set_num_threads( int arg )
1564{
1565// !!!!! TODO: check the per-task binding
1566 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1567}
1568
1569void
1570ompc_set_dynamic( int flag )
1571{
1572 kmp_info_t *thread;
1573
1574 /* For the thread-private implementation of the internal controls */
1575 thread = __kmp_entry_thread();
1576
1577 __kmp_save_internal_controls( thread );
1578
1579 set__dynamic( thread, flag ? TRUE : FALSE );
1580}
1581
1582void
1583ompc_set_nested( int flag )
1584{
1585 kmp_info_t *thread;
1586
1587 /* For the thread-private internal controls implementation */
1588 thread = __kmp_entry_thread();
1589
1590 __kmp_save_internal_controls( thread );
1591
1592 set__nested( thread, flag ? TRUE : FALSE );
1593}
1594
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595void
1596ompc_set_max_active_levels( int max_active_levels )
1597{
1598 /* TO DO */
1599 /* we want per-task implementation of this internal control */
1600
1601 /* For the per-thread internal controls implementation */
1602 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1603}
1604
1605void
1606ompc_set_schedule( omp_sched_t kind, int modifier )
1607{
1608// !!!!! TODO: check the per-task binding
1609 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1610}
1611
1612int
1613ompc_get_ancestor_thread_num( int level )
1614{
1615 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1616}
1617
1618int
1619ompc_get_team_size( int level )
1620{
1621 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1622}
1623
Jim Cownie5e8470a2013-09-27 10:38:44 +00001624void
1625kmpc_set_stacksize( int arg )
1626{
1627 // __kmp_aux_set_stacksize initializes the library if needed
1628 __kmp_aux_set_stacksize( arg );
1629}
1630
1631void
1632kmpc_set_stacksize_s( size_t arg )
1633{
1634 // __kmp_aux_set_stacksize initializes the library if needed
1635 __kmp_aux_set_stacksize( arg );
1636}
1637
1638void
1639kmpc_set_blocktime( int arg )
1640{
1641 int gtid, tid;
1642 kmp_info_t *thread;
1643
1644 gtid = __kmp_entry_gtid();
1645 tid = __kmp_tid_from_gtid(gtid);
1646 thread = __kmp_thread_from_gtid(gtid);
1647
1648 __kmp_aux_set_blocktime( arg, thread, tid );
1649}
1650
1651void
1652kmpc_set_library( int arg )
1653{
1654 // __kmp_user_set_library initializes the library if needed
1655 __kmp_user_set_library( (enum library_type)arg );
1656}
1657
1658void
1659kmpc_set_defaults( char const * str )
1660{
1661 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001662 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001663}
1664
Jonathan Peyton067325f2016-05-31 19:01:15 +00001665void
1666kmpc_set_disp_num_buffers( int arg )
1667{
1668 // ignore after initialization because some teams have already
1669 // allocated dispatch buffers
1670 if( __kmp_init_serial == 0 && arg > 0 )
1671 __kmp_dispatch_num_buffers = arg;
1672}
1673
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674int
1675kmpc_set_affinity_mask_proc( int proc, void **mask )
1676{
Alp Toker98758b02014-03-02 04:12:06 +00001677#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678 return -1;
1679#else
1680 if ( ! TCR_4(__kmp_init_middle) ) {
1681 __kmp_middle_initialize();
1682 }
1683 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1684#endif
1685}
1686
1687int
1688kmpc_unset_affinity_mask_proc( int proc, void **mask )
1689{
Alp Toker98758b02014-03-02 04:12:06 +00001690#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691 return -1;
1692#else
1693 if ( ! TCR_4(__kmp_init_middle) ) {
1694 __kmp_middle_initialize();
1695 }
1696 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1697#endif
1698}
1699
1700int
1701kmpc_get_affinity_mask_proc( int proc, void **mask )
1702{
Alp Toker98758b02014-03-02 04:12:06 +00001703#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001704 return -1;
1705#else
1706 if ( ! TCR_4(__kmp_init_middle) ) {
1707 __kmp_middle_initialize();
1708 }
1709 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1710#endif
1711}
1712
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713
1714/* -------------------------------------------------------------------------- */
1715/*!
1716@ingroup THREADPRIVATE
1717@param loc source location information
1718@param gtid global thread number
1719@param cpy_size size of the cpy_data buffer
1720@param cpy_data pointer to data to be copied
1721@param cpy_func helper function to call for copying data
1722@param didit flag variable: 1=single thread; 0=not single thread
1723
1724__kmpc_copyprivate implements the interface for the private data broadcast needed for
1725the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1726All threads participating in the parallel region call this routine.
1727One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1728and all other threads should have that variable set to 0.
1729All threads pass a pointer to a data buffer (cpy_data) that they have built.
1730
1731The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1732clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1733race conditions, so the code generation for the single region should avoid generating a barrier
1734after the call to @ref __kmpc_copyprivate.
1735
1736The <tt>gtid</tt> parameter is the global thread id for the current thread.
1737The <tt>loc</tt> parameter is a pointer to source location information.
1738
1739Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1740to a team-private location, then the other threads will each call the function pointed to by
1741the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1742
1743The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1744and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1745the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1746to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1747
1748The interface to cpy_func is as follows:
1749@code
1750void cpy_func( void *destination, void *source )
1751@endcode
1752where void *destination is the cpy_data pointer for the thread being copied to
1753and void *source is the cpy_data pointer for the thread being copied from.
1754*/
1755void
1756__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1757{
1758 void **data_ptr;
1759
1760 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1761
1762 KMP_MB();
1763
1764 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1765
1766 if ( __kmp_env_consistency_check ) {
1767 if ( loc == 0 ) {
1768 KMP_WARNING( ConstructIdentInvalid );
1769 }
1770 }
1771
1772 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1773
1774 if (didit) *data_ptr = cpy_data;
1775
1776 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001777#if USE_ITT_NOTIFY
1778 __kmp_threads[gtid]->th.th_ident = loc;
1779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001780 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1781
1782 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1783
1784 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1785 /* Nesting checks are already handled by the single construct checks */
1786
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001787#if USE_ITT_NOTIFY
1788 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1789#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1791}
1792
1793/* -------------------------------------------------------------------------- */
1794
1795#define INIT_LOCK __kmp_init_user_lock_with_checks
1796#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1797#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1798#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1799#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1800#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1801#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1802#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1803#define TEST_LOCK __kmp_test_user_lock_with_checks
1804#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1805#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1806#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1807
1808
1809/*
1810 * TODO: Make check abort messages use location info & pass it
1811 * into with_checks routines
1812 */
1813
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001814#if KMP_USE_DYNAMIC_LOCK
1815
1816// internal lock initializer
1817static __forceinline void
1818__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1819{
1820 if (KMP_IS_D_LOCK(seq)) {
1821 KMP_INIT_D_LOCK(lock, seq);
1822#if USE_ITT_BUILD
1823 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1824#endif
1825 } else {
1826 KMP_INIT_I_LOCK(lock, seq);
1827#if USE_ITT_BUILD
1828 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1829 __kmp_itt_lock_creating(ilk->lock, loc);
1830#endif
1831 }
1832}
1833
1834// internal nest lock initializer
1835static __forceinline void
1836__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1837{
1838#if KMP_USE_TSX
1839 // Don't have nested lock implementation for speculative locks
1840 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1841 seq = __kmp_user_lock_seq;
1842#endif
1843 switch (seq) {
1844 case lockseq_tas:
1845 seq = lockseq_nested_tas;
1846 break;
1847#if KMP_USE_FUTEX
1848 case lockseq_futex:
1849 seq = lockseq_nested_futex;
1850 break;
1851#endif
1852 case lockseq_ticket:
1853 seq = lockseq_nested_ticket;
1854 break;
1855 case lockseq_queuing:
1856 seq = lockseq_nested_queuing;
1857 break;
1858 case lockseq_drdpa:
1859 seq = lockseq_nested_drdpa;
1860 break;
1861 default:
1862 seq = lockseq_nested_queuing;
1863 }
1864 KMP_INIT_I_LOCK(lock, seq);
1865#if USE_ITT_BUILD
1866 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1867 __kmp_itt_lock_creating(ilk->lock, loc);
1868#endif
1869}
1870
1871/* initialize the lock with a hint */
1872void
1873__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1874{
1875 KMP_DEBUG_ASSERT(__kmp_init_serial);
1876 if (__kmp_env_consistency_check && user_lock == NULL) {
1877 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1878 }
1879
1880 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1881}
1882
1883/* initialize the lock with a hint */
1884void
1885__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1886{
1887 KMP_DEBUG_ASSERT(__kmp_init_serial);
1888 if (__kmp_env_consistency_check && user_lock == NULL) {
1889 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1890 }
1891
1892 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1893}
1894
1895#endif // KMP_USE_DYNAMIC_LOCK
1896
Jim Cownie5e8470a2013-09-27 10:38:44 +00001897/* initialize the lock */
1898void
1899__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001900#if KMP_USE_DYNAMIC_LOCK
1901 KMP_DEBUG_ASSERT(__kmp_init_serial);
1902 if (__kmp_env_consistency_check && user_lock == NULL) {
1903 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1904 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001905 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001906
1907#else // KMP_USE_DYNAMIC_LOCK
1908
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909 static char const * const func = "omp_init_lock";
1910 kmp_user_lock_p lck;
1911 KMP_DEBUG_ASSERT( __kmp_init_serial );
1912
1913 if ( __kmp_env_consistency_check ) {
1914 if ( user_lock == NULL ) {
1915 KMP_FATAL( LockIsUninitialized, func );
1916 }
1917 }
1918
1919 KMP_CHECK_USER_LOCK_INIT();
1920
1921 if ( ( __kmp_user_lock_kind == lk_tas )
1922 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1923 lck = (kmp_user_lock_p)user_lock;
1924 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001925#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926 else if ( ( __kmp_user_lock_kind == lk_futex )
1927 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1928 lck = (kmp_user_lock_p)user_lock;
1929 }
1930#endif
1931 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001932 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 }
1934 INIT_LOCK( lck );
1935 __kmp_set_user_lock_location( lck, loc );
1936
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001937#if OMPT_SUPPORT && OMPT_TRACE
1938 if (ompt_enabled &&
1939 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1940 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1941 }
1942#endif
1943
Jim Cownie5e8470a2013-09-27 10:38:44 +00001944#if USE_ITT_BUILD
1945 __kmp_itt_lock_creating( lck );
1946#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001947
1948#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949} // __kmpc_init_lock
1950
1951/* initialize the lock */
1952void
1953__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001954#if KMP_USE_DYNAMIC_LOCK
1955
1956 KMP_DEBUG_ASSERT(__kmp_init_serial);
1957 if (__kmp_env_consistency_check && user_lock == NULL) {
1958 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1959 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001960 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001961
1962#else // KMP_USE_DYNAMIC_LOCK
1963
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 static char const * const func = "omp_init_nest_lock";
1965 kmp_user_lock_p lck;
1966 KMP_DEBUG_ASSERT( __kmp_init_serial );
1967
1968 if ( __kmp_env_consistency_check ) {
1969 if ( user_lock == NULL ) {
1970 KMP_FATAL( LockIsUninitialized, func );
1971 }
1972 }
1973
1974 KMP_CHECK_USER_LOCK_INIT();
1975
1976 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1977 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1978 lck = (kmp_user_lock_p)user_lock;
1979 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001980#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981 else if ( ( __kmp_user_lock_kind == lk_futex )
1982 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1983 <= OMP_NEST_LOCK_T_SIZE ) ) {
1984 lck = (kmp_user_lock_p)user_lock;
1985 }
1986#endif
1987 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001988 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001989 }
1990
1991 INIT_NESTED_LOCK( lck );
1992 __kmp_set_user_lock_location( lck, loc );
1993
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001994#if OMPT_SUPPORT && OMPT_TRACE
1995 if (ompt_enabled &&
1996 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1997 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
1998 }
1999#endif
2000
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001#if USE_ITT_BUILD
2002 __kmp_itt_lock_creating( lck );
2003#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002004
2005#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006} // __kmpc_init_nest_lock
2007
2008void
2009__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002010#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002012# if USE_ITT_BUILD
2013 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002014 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2015 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002016 } else {
2017 lck = (kmp_user_lock_p)user_lock;
2018 }
2019 __kmp_itt_lock_destroyed(lck);
2020# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002021 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002022#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023 kmp_user_lock_p lck;
2024
2025 if ( ( __kmp_user_lock_kind == lk_tas )
2026 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2027 lck = (kmp_user_lock_p)user_lock;
2028 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002029#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 else if ( ( __kmp_user_lock_kind == lk_futex )
2031 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2032 lck = (kmp_user_lock_p)user_lock;
2033 }
2034#endif
2035 else {
2036 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2037 }
2038
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002039#if OMPT_SUPPORT && OMPT_TRACE
2040 if (ompt_enabled &&
2041 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2042 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2043 }
2044#endif
2045
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046#if USE_ITT_BUILD
2047 __kmp_itt_lock_destroyed( lck );
2048#endif /* USE_ITT_BUILD */
2049 DESTROY_LOCK( lck );
2050
2051 if ( ( __kmp_user_lock_kind == lk_tas )
2052 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2053 ;
2054 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002055#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 else if ( ( __kmp_user_lock_kind == lk_futex )
2057 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2058 ;
2059 }
2060#endif
2061 else {
2062 __kmp_user_lock_free( user_lock, gtid, lck );
2063 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002064#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065} // __kmpc_destroy_lock
2066
2067/* destroy the lock */
2068void
2069__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002070#if KMP_USE_DYNAMIC_LOCK
2071
2072# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002073 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002074 __kmp_itt_lock_destroyed(ilk->lock);
2075# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002076 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002077
2078#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079
2080 kmp_user_lock_p lck;
2081
2082 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2083 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2084 lck = (kmp_user_lock_p)user_lock;
2085 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002086#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002087 else if ( ( __kmp_user_lock_kind == lk_futex )
2088 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2089 <= OMP_NEST_LOCK_T_SIZE ) ) {
2090 lck = (kmp_user_lock_p)user_lock;
2091 }
2092#endif
2093 else {
2094 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2095 }
2096
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002097#if OMPT_SUPPORT && OMPT_TRACE
2098 if (ompt_enabled &&
2099 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2100 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2101 }
2102#endif
2103
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104#if USE_ITT_BUILD
2105 __kmp_itt_lock_destroyed( lck );
2106#endif /* USE_ITT_BUILD */
2107
2108 DESTROY_NESTED_LOCK( lck );
2109
2110 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2111 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2112 ;
2113 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002114#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115 else if ( ( __kmp_user_lock_kind == lk_futex )
2116 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2117 <= OMP_NEST_LOCK_T_SIZE ) ) {
2118 ;
2119 }
2120#endif
2121 else {
2122 __kmp_user_lock_free( user_lock, gtid, lck );
2123 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002124#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002125} // __kmpc_destroy_nest_lock
2126
2127void
2128__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002129 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002130#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002131 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002132# if USE_ITT_BUILD
2133 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2134# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002135# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002136 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002137 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002138 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002139# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002140 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002141 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002142 } else
2143# endif
2144 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002145 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002146 }
2147# if USE_ITT_BUILD
2148 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2149# endif
2150
2151#else // KMP_USE_DYNAMIC_LOCK
2152
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153 kmp_user_lock_p lck;
2154
2155 if ( ( __kmp_user_lock_kind == lk_tas )
2156 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2157 lck = (kmp_user_lock_p)user_lock;
2158 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002159#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002160 else if ( ( __kmp_user_lock_kind == lk_futex )
2161 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2162 lck = (kmp_user_lock_p)user_lock;
2163 }
2164#endif
2165 else {
2166 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2167 }
2168
2169#if USE_ITT_BUILD
2170 __kmp_itt_lock_acquiring( lck );
2171#endif /* USE_ITT_BUILD */
2172
2173 ACQUIRE_LOCK( lck, gtid );
2174
2175#if USE_ITT_BUILD
2176 __kmp_itt_lock_acquired( lck );
2177#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002179#if OMPT_SUPPORT && OMPT_TRACE
2180 if (ompt_enabled &&
2181 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2182 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2183 }
2184#endif
2185
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002186#endif // KMP_USE_DYNAMIC_LOCK
2187}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188
2189void
2190__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002191#if KMP_USE_DYNAMIC_LOCK
2192
2193# if USE_ITT_BUILD
2194 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2195# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002196 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002197# if USE_ITT_BUILD
2198 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2199#endif
2200
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002201#if OMPT_SUPPORT && OMPT_TRACE
2202 if (ompt_enabled) {
2203 // missing support here: need to know whether acquired first or not
2204 }
2205#endif
2206
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002207#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002208 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209 kmp_user_lock_p lck;
2210
2211 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2212 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2213 lck = (kmp_user_lock_p)user_lock;
2214 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002215#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216 else if ( ( __kmp_user_lock_kind == lk_futex )
2217 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2218 <= OMP_NEST_LOCK_T_SIZE ) ) {
2219 lck = (kmp_user_lock_p)user_lock;
2220 }
2221#endif
2222 else {
2223 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2224 }
2225
2226#if USE_ITT_BUILD
2227 __kmp_itt_lock_acquiring( lck );
2228#endif /* USE_ITT_BUILD */
2229
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002230 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002231
2232#if USE_ITT_BUILD
2233 __kmp_itt_lock_acquired( lck );
2234#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002235
2236#if OMPT_SUPPORT && OMPT_TRACE
2237 if (ompt_enabled) {
2238 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2239 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2240 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2241 } else {
2242 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2243 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2244 }
2245 }
2246#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002247
2248#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002249}
2250
2251void
2252__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2253{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002254#if KMP_USE_DYNAMIC_LOCK
2255
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002256 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002257# if USE_ITT_BUILD
2258 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2259# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002260# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002261 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002262 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002263 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002264# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002265 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002266 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002267 } else
2268# endif
2269 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002270 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002271 }
2272
2273#else // KMP_USE_DYNAMIC_LOCK
2274
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275 kmp_user_lock_p lck;
2276
2277 /* Can't use serial interval since not block structured */
2278 /* release the lock */
2279
2280 if ( ( __kmp_user_lock_kind == lk_tas )
2281 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002282#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283 // "fast" path implemented to fix customer performance issue
2284#if USE_ITT_BUILD
2285 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2286#endif /* USE_ITT_BUILD */
2287 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2288 KMP_MB();
2289 return;
2290#else
2291 lck = (kmp_user_lock_p)user_lock;
2292#endif
2293 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002294#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002295 else if ( ( __kmp_user_lock_kind == lk_futex )
2296 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2297 lck = (kmp_user_lock_p)user_lock;
2298 }
2299#endif
2300 else {
2301 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2302 }
2303
2304#if USE_ITT_BUILD
2305 __kmp_itt_lock_releasing( lck );
2306#endif /* USE_ITT_BUILD */
2307
2308 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002309
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002310#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002311 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002312 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2313 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2314 }
2315#endif
2316
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002317#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318}
2319
2320/* release the lock */
2321void
2322__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2323{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002324#if KMP_USE_DYNAMIC_LOCK
2325
2326# if USE_ITT_BUILD
2327 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2328# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002329 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002330
2331#else // KMP_USE_DYNAMIC_LOCK
2332
Jim Cownie5e8470a2013-09-27 10:38:44 +00002333 kmp_user_lock_p lck;
2334
2335 /* Can't use serial interval since not block structured */
2336
2337 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2338 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002339#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002340 // "fast" path implemented to fix customer performance issue
2341 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2342#if USE_ITT_BUILD
2343 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2344#endif /* USE_ITT_BUILD */
2345 if ( --(tl->lk.depth_locked) == 0 ) {
2346 TCW_4(tl->lk.poll, 0);
2347 }
2348 KMP_MB();
2349 return;
2350#else
2351 lck = (kmp_user_lock_p)user_lock;
2352#endif
2353 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002354#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355 else if ( ( __kmp_user_lock_kind == lk_futex )
2356 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2357 <= OMP_NEST_LOCK_T_SIZE ) ) {
2358 lck = (kmp_user_lock_p)user_lock;
2359 }
2360#endif
2361 else {
2362 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2363 }
2364
2365#if USE_ITT_BUILD
2366 __kmp_itt_lock_releasing( lck );
2367#endif /* USE_ITT_BUILD */
2368
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002369 int release_status;
2370 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002371#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002372 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002373 if (release_status == KMP_LOCK_RELEASED) {
2374 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2375 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2376 (uint64_t) lck);
2377 }
2378 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2379 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2380 (uint64_t) lck);
2381 }
2382 }
2383#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002384
2385#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386}
2387
2388/* try to acquire the lock */
2389int
2390__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2391{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002392 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002393
2394#if KMP_USE_DYNAMIC_LOCK
2395 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002396 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002397# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002398 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002399# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002400# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002401 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002402 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002403 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002404# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002405 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002406 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002407 } else
2408# endif
2409 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002410 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002411 }
2412 if (rc) {
2413# if USE_ITT_BUILD
2414 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2415# endif
2416 return FTN_TRUE;
2417 } else {
2418# if USE_ITT_BUILD
2419 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2420# endif
2421 return FTN_FALSE;
2422 }
2423
2424#else // KMP_USE_DYNAMIC_LOCK
2425
Jim Cownie5e8470a2013-09-27 10:38:44 +00002426 kmp_user_lock_p lck;
2427 int rc;
2428
2429 if ( ( __kmp_user_lock_kind == lk_tas )
2430 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2431 lck = (kmp_user_lock_p)user_lock;
2432 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002433#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434 else if ( ( __kmp_user_lock_kind == lk_futex )
2435 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2436 lck = (kmp_user_lock_p)user_lock;
2437 }
2438#endif
2439 else {
2440 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2441 }
2442
2443#if USE_ITT_BUILD
2444 __kmp_itt_lock_acquiring( lck );
2445#endif /* USE_ITT_BUILD */
2446
2447 rc = TEST_LOCK( lck, gtid );
2448#if USE_ITT_BUILD
2449 if ( rc ) {
2450 __kmp_itt_lock_acquired( lck );
2451 } else {
2452 __kmp_itt_lock_cancelled( lck );
2453 }
2454#endif /* USE_ITT_BUILD */
2455 return ( rc ? FTN_TRUE : FTN_FALSE );
2456
2457 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002458
2459#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460}
2461
2462/* try to acquire the lock */
2463int
2464__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2465{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002466#if KMP_USE_DYNAMIC_LOCK
2467 int rc;
2468# if USE_ITT_BUILD
2469 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2470# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002471 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002472# if USE_ITT_BUILD
2473 if (rc) {
2474 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2475 } else {
2476 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2477 }
2478# endif
2479 return rc;
2480
2481#else // KMP_USE_DYNAMIC_LOCK
2482
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483 kmp_user_lock_p lck;
2484 int rc;
2485
2486 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2487 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2488 lck = (kmp_user_lock_p)user_lock;
2489 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002490#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 else if ( ( __kmp_user_lock_kind == lk_futex )
2492 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2493 <= OMP_NEST_LOCK_T_SIZE ) ) {
2494 lck = (kmp_user_lock_p)user_lock;
2495 }
2496#endif
2497 else {
2498 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2499 }
2500
2501#if USE_ITT_BUILD
2502 __kmp_itt_lock_acquiring( lck );
2503#endif /* USE_ITT_BUILD */
2504
2505 rc = TEST_NESTED_LOCK( lck, gtid );
2506#if USE_ITT_BUILD
2507 if ( rc ) {
2508 __kmp_itt_lock_acquired( lck );
2509 } else {
2510 __kmp_itt_lock_cancelled( lck );
2511 }
2512#endif /* USE_ITT_BUILD */
2513 return rc;
2514
2515 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002516
2517#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002518}
2519
2520
2521/*--------------------------------------------------------------------------------------------------------------------*/
2522
2523/*
2524 * Interface to fast scalable reduce methods routines
2525 */
2526
2527// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2528// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2529// AT: which solution is better?
2530#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2531 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2532
2533#define __KMP_GET_REDUCTION_METHOD(gtid) \
2534 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2535
2536// description of the packed_reduction_method variable: look at the macros in kmp.h
2537
2538
2539// used in a critical section reduce block
2540static __forceinline void
2541__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2542
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002543 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002544 // (although it's used for an internal purpose only)
2545 // why was it visible in previous implementation?
2546 // should we keep it visible in new reduce block?
2547 kmp_user_lock_p lck;
2548
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002549#if KMP_USE_DYNAMIC_LOCK
2550
Jonathan Peytondae13d82015-12-11 21:57:06 +00002551 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2552 // Check if it is initialized.
2553 if (*lk == 0) {
2554 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2555 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2556 } else {
2557 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002558 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002559 }
2560 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2561 // this lock initialization does not follow the normal dispatch path (lock table is not used).
2562 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2563 lck = (kmp_user_lock_p)lk;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002564 KMP_DEBUG_ASSERT(lck != NULL);
2565 if (__kmp_env_consistency_check) {
2566 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2567 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002568 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002569 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002570 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2571 lck = ilk->lock;
2572 KMP_DEBUG_ASSERT(lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002573 if (__kmp_env_consistency_check) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002574 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002575 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002576 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002577 }
2578
2579#else // KMP_USE_DYNAMIC_LOCK
2580
Jim Cownie5e8470a2013-09-27 10:38:44 +00002581 // We know that the fast reduction code is only emitted by Intel compilers
2582 // with 32 byte critical sections. If there isn't enough space, then we
2583 // have to use a pointer.
2584 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2585 lck = (kmp_user_lock_p)crit;
2586 }
2587 else {
2588 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2589 }
2590 KMP_DEBUG_ASSERT( lck != NULL );
2591
2592 if ( __kmp_env_consistency_check )
2593 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2594
2595 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002596
2597#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598}
2599
2600// used in a critical section reduce block
2601static __forceinline void
2602__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2603
2604 kmp_user_lock_p lck;
2605
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002606#if KMP_USE_DYNAMIC_LOCK
2607
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002608 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002609 lck = (kmp_user_lock_p)crit;
2610 if (__kmp_env_consistency_check)
2611 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002612 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002613 } else {
2614 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2615 if (__kmp_env_consistency_check)
2616 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002617 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002618 }
2619
2620#else // KMP_USE_DYNAMIC_LOCK
2621
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2623 // sections. If there isn't enough space, then we have to use a pointer.
2624 if ( __kmp_base_user_lock_size > 32 ) {
2625 lck = *( (kmp_user_lock_p *) crit );
2626 KMP_ASSERT( lck != NULL );
2627 } else {
2628 lck = (kmp_user_lock_p) crit;
2629 }
2630
2631 if ( __kmp_env_consistency_check )
2632 __kmp_pop_sync( global_tid, ct_critical, loc );
2633
2634 __kmp_release_user_lock_with_checks( lck, global_tid );
2635
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002636#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637} // __kmp_end_critical_section_reduce_block
2638
2639
2640/* 2.a.i. Reduce Block without a terminating barrier */
2641/*!
2642@ingroup SYNCHRONIZATION
2643@param loc source location information
2644@param global_tid global thread number
2645@param num_vars number of items (variables) to be reduced
2646@param reduce_size size of data in bytes to be reduced
2647@param reduce_data pointer to data to be reduced
2648@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2649@param lck pointer to the unique lock data structure
2650@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2651
2652The nowait version is used for a reduce clause with the nowait argument.
2653*/
2654kmp_int32
2655__kmpc_reduce_nowait(
2656 ident_t *loc, kmp_int32 global_tid,
2657 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2658 kmp_critical_name *lck ) {
2659
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002660 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002661 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002662 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002663#if OMP_40_ENABLED
2664 kmp_team_t *team;
2665 kmp_info_t *th;
2666 int teams_swapped = 0, task_state;
2667#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002668 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2669
2670 // why do we need this initialization here at all?
2671 // Reduction clause can not be used as a stand-alone directive.
2672
2673 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2674 // possible detection of false-positive race by the threadchecker ???
2675 if( ! TCR_4( __kmp_init_parallel ) )
2676 __kmp_parallel_initialize();
2677
2678 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002679#if KMP_USE_DYNAMIC_LOCK
2680 if ( __kmp_env_consistency_check )
2681 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2682#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683 if ( __kmp_env_consistency_check )
2684 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002685#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002687#if OMP_40_ENABLED
2688 th = __kmp_thread_from_gtid(global_tid);
2689 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2690 team = th->th.th_team;
2691 if( team->t.t_level == th->th.th_teams_level ) {
2692 // this is reduction at teams construct
2693 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2694 // Let's swap teams temporarily for the reduction barrier
2695 teams_swapped = 1;
2696 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2697 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002698 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002699 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002700 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002701 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002702 }
2703 }
2704#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002705
2706 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2707 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2708 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2709 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2710 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2711 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2712 // a thread-specific "th_local.reduction_method" variable is used currently
2713 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2714
2715 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2716 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2717
2718 if( packed_reduction_method == critical_reduce_block ) {
2719
2720 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2721 retval = 1;
2722
2723 } else if( packed_reduction_method == empty_reduce_block ) {
2724
2725 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2726 retval = 1;
2727
2728 } else if( packed_reduction_method == atomic_reduce_block ) {
2729
2730 retval = 2;
2731
2732 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2733 // (it's not quite good, because the checking block has been closed by this 'pop',
2734 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2735 if ( __kmp_env_consistency_check )
2736 __kmp_pop_sync( global_tid, ct_reduce, loc );
2737
2738 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2739
2740 //AT: performance issue: a real barrier here
2741 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2742 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2743 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2744 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2745 // and be more in line with sense of NOWAIT
2746 //AT: TO DO: do epcc test and compare times
2747
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002748 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002750#if USE_ITT_NOTIFY
2751 __kmp_threads[global_tid]->th.th_ident = loc;
2752#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2754 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2755
2756 // all other workers except master should do this pop here
2757 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2758 if ( __kmp_env_consistency_check ) {
2759 if( retval == 0 ) {
2760 __kmp_pop_sync( global_tid, ct_reduce, loc );
2761 }
2762 }
2763
2764 } else {
2765
2766 // should never reach this block
2767 KMP_ASSERT( 0 ); // "unexpected method"
2768
2769 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002770#if OMP_40_ENABLED
2771 if( teams_swapped ) {
2772 // Restore thread structure
2773 th->th.th_info.ds.ds_tid = 0;
2774 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002775 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002776 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002777 th->th.th_task_state = task_state;
2778 }
2779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2781
2782 return retval;
2783}
2784
2785/*!
2786@ingroup SYNCHRONIZATION
2787@param loc source location information
2788@param global_tid global thread id.
2789@param lck pointer to the unique lock data structure
2790
2791Finish the execution of a reduce nowait.
2792*/
2793void
2794__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2795
2796 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2797
2798 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2799
2800 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2801
2802 if( packed_reduction_method == critical_reduce_block ) {
2803
2804 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2805
2806 } else if( packed_reduction_method == empty_reduce_block ) {
2807
2808 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2809
2810 } else if( packed_reduction_method == atomic_reduce_block ) {
2811
2812 // neither master nor other workers should get here
2813 // (code gen does not generate this call in case 2: atomic reduce block)
2814 // actually it's better to remove this elseif at all;
2815 // after removal this value will checked by the 'else' and will assert
2816
2817 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2818
2819 // only master gets here
2820
2821 } else {
2822
2823 // should never reach this block
2824 KMP_ASSERT( 0 ); // "unexpected method"
2825
2826 }
2827
2828 if ( __kmp_env_consistency_check )
2829 __kmp_pop_sync( global_tid, ct_reduce, loc );
2830
2831 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2832
2833 return;
2834}
2835
2836/* 2.a.ii. Reduce Block with a terminating barrier */
2837
2838/*!
2839@ingroup SYNCHRONIZATION
2840@param loc source location information
2841@param global_tid global thread number
2842@param num_vars number of items (variables) to be reduced
2843@param reduce_size size of data in bytes to be reduced
2844@param reduce_data pointer to data to be reduced
2845@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2846@param lck pointer to the unique lock data structure
2847@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2848
2849A blocking reduce that includes an implicit barrier.
2850*/
2851kmp_int32
2852__kmpc_reduce(
2853 ident_t *loc, kmp_int32 global_tid,
2854 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2855 void (*reduce_func)(void *lhs_data, void *rhs_data),
2856 kmp_critical_name *lck )
2857{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002858 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002859 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2861
2862 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2863
2864 // why do we need this initialization here at all?
2865 // Reduction clause can not be a stand-alone directive.
2866
2867 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2868 // possible detection of false-positive race by the threadchecker ???
2869 if( ! TCR_4( __kmp_init_parallel ) )
2870 __kmp_parallel_initialize();
2871
2872 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002873#if KMP_USE_DYNAMIC_LOCK
2874 if ( __kmp_env_consistency_check )
2875 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2876#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877 if ( __kmp_env_consistency_check )
2878 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002879#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2882 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2883
2884 if( packed_reduction_method == critical_reduce_block ) {
2885
2886 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2887 retval = 1;
2888
2889 } else if( packed_reduction_method == empty_reduce_block ) {
2890
2891 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2892 retval = 1;
2893
2894 } else if( packed_reduction_method == atomic_reduce_block ) {
2895
2896 retval = 2;
2897
2898 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2899
2900 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002901 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002903#if USE_ITT_NOTIFY
2904 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2905#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002906 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2907 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2908
2909 // all other workers except master should do this pop here
2910 // ( none of other workers except master will enter __kmpc_end_reduce() )
2911 if ( __kmp_env_consistency_check ) {
2912 if( retval == 0 ) { // 0: all other workers; 1: master
2913 __kmp_pop_sync( global_tid, ct_reduce, loc );
2914 }
2915 }
2916
2917 } else {
2918
2919 // should never reach this block
2920 KMP_ASSERT( 0 ); // "unexpected method"
2921
2922 }
2923
2924 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2925
2926 return retval;
2927}
2928
2929/*!
2930@ingroup SYNCHRONIZATION
2931@param loc source location information
2932@param global_tid global thread id.
2933@param lck pointer to the unique lock data structure
2934
2935Finish the execution of a blocking reduce.
2936The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2937*/
2938void
2939__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2940
2941 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2942
2943 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2944
2945 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2946
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002947 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948 // (it's a terminating barrier on constructs if NOWAIT not specified)
2949
2950 if( packed_reduction_method == critical_reduce_block ) {
2951
2952 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2953
2954 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002955#if USE_ITT_NOTIFY
2956 __kmp_threads[global_tid]->th.th_ident = loc;
2957#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2959
2960 } else if( packed_reduction_method == empty_reduce_block ) {
2961
2962 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2963
2964 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002965#if USE_ITT_NOTIFY
2966 __kmp_threads[global_tid]->th.th_ident = loc;
2967#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002968 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2969
2970 } else if( packed_reduction_method == atomic_reduce_block ) {
2971
2972 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002973#if USE_ITT_NOTIFY
2974 __kmp_threads[global_tid]->th.th_ident = loc;
2975#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002976 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2977
2978 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2979
2980 // only master executes here (master releases all other workers)
2981 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2982
2983 } else {
2984
2985 // should never reach this block
2986 KMP_ASSERT( 0 ); // "unexpected method"
2987
2988 }
2989
2990 if ( __kmp_env_consistency_check )
2991 __kmp_pop_sync( global_tid, ct_reduce, loc );
2992
2993 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2994
2995 return;
2996}
2997
2998#undef __KMP_GET_REDUCTION_METHOD
2999#undef __KMP_SET_REDUCTION_METHOD
3000
3001/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
3002
3003kmp_uint64
3004__kmpc_get_taskid() {
3005
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003006 kmp_int32 gtid;
3007 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003008
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003009 gtid = __kmp_get_gtid();
3010 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003012 }; // if
3013 thread = __kmp_thread_from_gtid( gtid );
3014 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003015
3016} // __kmpc_get_taskid
3017
3018
3019kmp_uint64
3020__kmpc_get_parent_taskid() {
3021
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003022 kmp_int32 gtid;
3023 kmp_info_t * thread;
3024 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003025
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003026 gtid = __kmp_get_gtid();
3027 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003028 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003029 }; // if
3030 thread = __kmp_thread_from_gtid( gtid );
3031 parent_task = thread->th.th_current_task->td_parent;
3032 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003033
3034} // __kmpc_get_parent_taskid
3035
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003036void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038 if ( ! __kmp_init_serial ) {
3039 __kmp_serial_initialize();
3040 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003041 __kmp_place_num_sockets = nS;
3042 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003043 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003044 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003045 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003046}
3047
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003048#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003049/*!
3050@ingroup WORK_SHARING
3051@param loc source location information.
3052@param gtid global thread number.
3053@param num_dims number of associated doacross loops.
3054@param dims info on loops bounds.
3055
3056Initialize doacross loop information.
3057Expect compiler send us inclusive bounds,
3058e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3059*/
3060void
3061__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims)
3062{
3063 int j, idx;
3064 kmp_int64 last, trace_count;
3065 kmp_info_t *th = __kmp_threads[gtid];
3066 kmp_team_t *team = th->th.th_team;
3067 kmp_uint32 *flags;
3068 kmp_disp_t *pr_buf = th->th.th_dispatch;
3069 dispatch_shared_info_t *sh_buf;
3070
3071 KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3072 gtid, num_dims, !team->t.t_serialized));
3073 KMP_DEBUG_ASSERT(dims != NULL);
3074 KMP_DEBUG_ASSERT(num_dims > 0);
3075
3076 if( team->t.t_serialized ) {
3077 KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n"));
3078 return; // no dependencies if team is serialized
3079 }
3080 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3081 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop
Jonathan Peyton067325f2016-05-31 19:01:15 +00003082 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003083
3084 // Save bounds info into allocated private buffer
3085 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3086 pr_buf->th_doacross_info =
3087 (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1));
3088 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3089 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions
3090 // Save also address of num_done in order to access it later without knowing the buffer index
3091 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3092 pr_buf->th_doacross_info[2] = dims[0].lo;
3093 pr_buf->th_doacross_info[3] = dims[0].up;
3094 pr_buf->th_doacross_info[4] = dims[0].st;
3095 last = 5;
3096 for( j = 1; j < num_dims; ++j ) {
3097 kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0]
3098 if( dims[j].st == 1 ) { // most common case
3099 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3100 range_length = dims[j].up - dims[j].lo + 1;
3101 } else {
3102 if( dims[j].st > 0 ) {
3103 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3104 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3105 } else { // negative increment
3106 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3107 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3108 }
3109 }
3110 pr_buf->th_doacross_info[last++] = range_length;
3111 pr_buf->th_doacross_info[last++] = dims[j].lo;
3112 pr_buf->th_doacross_info[last++] = dims[j].up;
3113 pr_buf->th_doacross_info[last++] = dims[j].st;
3114 }
3115
3116 // Compute total trip count.
3117 // Start with range of dims[0] which we don't need to keep in the buffer.
3118 if( dims[0].st == 1 ) { // most common case
3119 trace_count = dims[0].up - dims[0].lo + 1;
3120 } else if( dims[0].st > 0 ) {
3121 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3122 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3123 } else { // negative increment
3124 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3125 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3126 }
3127 for( j = 1; j < num_dims; ++j ) {
3128 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3129 }
3130 KMP_DEBUG_ASSERT(trace_count > 0);
3131
Jonathan Peyton067325f2016-05-31 19:01:15 +00003132 // Check if shared buffer is not occupied by other loop (idx - __kmp_dispatch_num_buffers)
Jonathan Peyton71909c52016-03-02 22:42:06 +00003133 if( idx != sh_buf->doacross_buf_idx ) {
3134 // Shared buffer is occupied, wait for it to be free
3135 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3136 }
3137 // Check if we are the first thread. After the CAS the first thread gets 0,
3138 // others get 1 if initialization is in progress, allocated pointer otherwise.
3139 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3140 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3141 if( flags == NULL ) {
3142 // we are the first thread, allocate the array of flags
3143 kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3144 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3145 } else if( (kmp_int64)flags == 1 ) {
3146 // initialization is still in progress, need to wait
3147 while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3148 KMP_YIELD(TRUE);
3149 }
3150 }
3151 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer
3152 pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not
3153 // touch shared buffer on each iteration
3154 KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid));
3155}
3156
3157void
3158__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec)
3159{
3160 kmp_int32 shft, num_dims, i;
3161 kmp_uint32 flag;
3162 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3163 kmp_info_t *th = __kmp_threads[gtid];
3164 kmp_team_t *team = th->th.th_team;
3165 kmp_disp_t *pr_buf;
3166 kmp_int64 lo, up, st;
3167
3168 KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3169 if( team->t.t_serialized ) {
3170 KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n"));
3171 return; // no dependencies if team is serialized
3172 }
3173
3174 // calculate sequential iteration number and check out-of-bounds condition
3175 pr_buf = th->th.th_dispatch;
3176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3177 num_dims = pr_buf->th_doacross_info[0];
3178 lo = pr_buf->th_doacross_info[2];
3179 up = pr_buf->th_doacross_info[3];
3180 st = pr_buf->th_doacross_info[4];
3181 if( st == 1 ) { // most common case
3182 if( vec[0] < lo || vec[0] > up ) {
3183 KA_TRACE(20,(
3184 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3185 gtid, vec[0], lo, up));
3186 return;
3187 }
3188 iter_number = vec[0] - lo;
3189 } else if( st > 0 ) {
3190 if( vec[0] < lo || vec[0] > up ) {
3191 KA_TRACE(20,(
3192 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3193 gtid, vec[0], lo, up));
3194 return;
3195 }
3196 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3197 } else { // negative increment
3198 if( vec[0] > lo || vec[0] < up ) {
3199 KA_TRACE(20,(
3200 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3201 gtid, vec[0], lo, up));
3202 return;
3203 }
3204 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3205 }
3206 for( i = 1; i < num_dims; ++i ) {
3207 kmp_int64 iter, ln;
3208 kmp_int32 j = i * 4;
3209 ln = pr_buf->th_doacross_info[j + 1];
3210 lo = pr_buf->th_doacross_info[j + 2];
3211 up = pr_buf->th_doacross_info[j + 3];
3212 st = pr_buf->th_doacross_info[j + 4];
3213 if( st == 1 ) {
3214 if( vec[i] < lo || vec[i] > up ) {
3215 KA_TRACE(20,(
3216 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3217 gtid, vec[i], lo, up));
3218 return;
3219 }
3220 iter = vec[i] - lo;
3221 } else if( st > 0 ) {
3222 if( vec[i] < lo || vec[i] > up ) {
3223 KA_TRACE(20,(
3224 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3225 gtid, vec[i], lo, up));
3226 return;
3227 }
3228 iter = (kmp_uint64)(vec[i] - lo) / st;
3229 } else { // st < 0
3230 if( vec[i] > lo || vec[i] < up ) {
3231 KA_TRACE(20,(
3232 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3233 gtid, vec[i], lo, up));
3234 return;
3235 }
3236 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3237 }
3238 iter_number = iter + ln * iter_number;
3239 }
3240 shft = iter_number % 32; // use 32-bit granularity
3241 iter_number >>= 5; // divided by 32
3242 flag = 1 << shft;
3243 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3244 KMP_YIELD(TRUE);
3245 }
3246 KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3247 gtid, (iter_number<<5)+shft));
3248}
3249
3250void
3251__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec)
3252{
3253 kmp_int32 shft, num_dims, i;
3254 kmp_uint32 flag;
3255 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3256 kmp_info_t *th = __kmp_threads[gtid];
3257 kmp_team_t *team = th->th.th_team;
3258 kmp_disp_t *pr_buf;
3259 kmp_int64 lo, st;
3260
3261 KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3262 if( team->t.t_serialized ) {
3263 KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n"));
3264 return; // no dependencies if team is serialized
3265 }
3266
3267 // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks)
3268 pr_buf = th->th.th_dispatch;
3269 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3270 num_dims = pr_buf->th_doacross_info[0];
3271 lo = pr_buf->th_doacross_info[2];
3272 st = pr_buf->th_doacross_info[4];
3273 if( st == 1 ) { // most common case
3274 iter_number = vec[0] - lo;
3275 } else if( st > 0 ) {
3276 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3277 } else { // negative increment
3278 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3279 }
3280 for( i = 1; i < num_dims; ++i ) {
3281 kmp_int64 iter, ln;
3282 kmp_int32 j = i * 4;
3283 ln = pr_buf->th_doacross_info[j + 1];
3284 lo = pr_buf->th_doacross_info[j + 2];
3285 st = pr_buf->th_doacross_info[j + 4];
3286 if( st == 1 ) {
3287 iter = vec[i] - lo;
3288 } else if( st > 0 ) {
3289 iter = (kmp_uint64)(vec[i] - lo) / st;
3290 } else { // st < 0
3291 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3292 }
3293 iter_number = iter + ln * iter_number;
3294 }
3295 shft = iter_number % 32; // use 32-bit granularity
3296 iter_number >>= 5; // divided by 32
3297 flag = 1 << shft;
3298 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3299 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3300 KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3301 gtid, (iter_number<<5)+shft));
3302}
3303
3304void
3305__kmpc_doacross_fini(ident_t *loc, int gtid)
3306{
3307 kmp_int64 num_done;
3308 kmp_info_t *th = __kmp_threads[gtid];
3309 kmp_team_t *team = th->th.th_team;
3310 kmp_disp_t *pr_buf = th->th.th_dispatch;
3311
3312 KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3313 if( team->t.t_serialized ) {
3314 KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team));
3315 return; // nothing to do
3316 }
3317 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3318 if( num_done == th->th.th_team_nproc ) {
3319 // we are the last thread, need to free shared resources
3320 int idx = pr_buf->th_doacross_buf_idx - 1;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003321 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003322 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3323 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3324 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3325 __kmp_thread_free(th, (void*)sh_buf->doacross_flags);
3326 sh_buf->doacross_flags = NULL;
3327 sh_buf->doacross_num_done = 0;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003328 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers; // free buffer for future re-use
Jonathan Peyton71909c52016-03-02 22:42:06 +00003329 }
3330 // free private resources (need to keep buffer index forever)
3331 __kmp_thread_free(th, (void*)pr_buf->th_doacross_info);
3332 pr_buf->th_doacross_info = NULL;
3333 KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid));
3334}
3335#endif
3336
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337// end of file //
3338