blob: 7c333e2f4573289380ebeb9a02e39f3bc97769c7 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#if OMPT_SUPPORT
25#include "ompt-internal.h"
26#include "ompt-specific.h"
27#endif
28
Jim Cownie5e8470a2013-09-27 10:38:44 +000029#define MAX_MESSAGE 512
30
31/* ------------------------------------------------------------------------ */
32/* ------------------------------------------------------------------------ */
33
34/* flags will be used in future, e.g., to implement */
35/* openmp_strict library restrictions */
36
37/*!
38 * @ingroup STARTUP_SHUTDOWN
39 * @param loc in source location information
40 * @param flags in for future use (currently ignored)
41 *
42 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000043 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000044 *
45 */
46void
47__kmpc_begin(ident_t *loc, kmp_int32 flags)
48{
49 // By default __kmp_ignore_mppbeg() returns TRUE.
50 if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
52
53 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
54 }
55}
56
57/*!
58 * @ingroup STARTUP_SHUTDOWN
59 * @param loc source location information
60 *
61 * Shutdown the runtime library. This is also optional, and even if called will not
62 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
63 */
64void
65__kmpc_end(ident_t *loc)
66{
67 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
68 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
69 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
70 // will unregister this root (it can cause library shut down).
71 if (__kmp_ignore_mppend() == FALSE) {
72 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
73 KA_TRACE( 30, ("__kmpc_end\n" ));
74
75 __kmp_internal_end_thread( -1 );
76 }
77}
78
79/*!
80@ingroup THREAD_STATES
81@param loc Source location information.
82@return The global thread index of the active thread.
83
84This function can be called in any context.
85
86If the runtime has ony been entered at the outermost level from a
87single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000088which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000089active parallel construct. (Or zero if there is no active parallel
90construct, since the master thread is necessarily thread zero).
91
92If multiple non-OpenMP threads all enter an OpenMP construct then this
93will be a unique thread identifier among all the threads created by
94the OpenMP runtime (but the value cannote be defined in terms of
95OpenMP thread ids returned by omp_get_thread_num()).
96
97*/
98kmp_int32
99__kmpc_global_thread_num(ident_t *loc)
100{
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
104
105 return gtid;
106}
107
108/*!
109@ingroup THREAD_STATES
110@param loc Source location information.
111@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113This function can be called in any context.
114It returns the total number of threads under the control of the OpenMP runtime. That is
115not a number that can be determined by any OpenMP standard calls, since the library may be
116called from more than one non-OpenMP thread, and this reflects the total over all such calls.
117Similarly the runtime maintains underlying threads even when they are not active (since the cost
118of creating and destroying OS threads is high), this call counts all such threads even if they are not
119waiting for work.
120*/
121kmp_int32
122__kmpc_global_num_threads(ident_t *loc)
123{
124 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
125
126 return TCR_4(__kmp_nth);
127}
128
129/*!
130@ingroup THREAD_STATES
131@param loc Source location information.
132@return The thread number of the calling thread in the innermost active parallel construct.
133
134*/
135kmp_int32
136__kmpc_bound_thread_num(ident_t *loc)
137{
138 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
139 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
147kmp_int32
148__kmpc_bound_num_threads(ident_t *loc)
149{
150 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
151
152 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
153}
154
155/*!
156 * @ingroup DEPRECATED
157 * @param loc location description
158 *
159 * This function need not be called. It always returns TRUE.
160 */
161kmp_int32
162__kmpc_ok_to_fork(ident_t *loc)
163{
164#ifndef KMP_DEBUG
165
166 return TRUE;
167
168#else
169
170 const char *semi2;
171 const char *semi3;
172 int line_no;
173
174 if (__kmp_par_range == 0) {
175 return TRUE;
176 }
177 semi2 = loc->psource;
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 semi2 = strchr(semi2, ';');
182 if (semi2 == NULL) {
183 return TRUE;
184 }
185 semi2 = strchr(semi2 + 1, ';');
186 if (semi2 == NULL) {
187 return TRUE;
188 }
189 if (__kmp_par_range_filename[0]) {
190 const char *name = semi2 - 1;
191 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
192 name--;
193 }
194 if ((*name == '/') || (*name == ';')) {
195 name++;
196 }
197 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
198 return __kmp_par_range < 0;
199 }
200 }
201 semi3 = strchr(semi2 + 1, ';');
202 if (__kmp_par_range_routine[0]) {
203 if ((semi3 != NULL) && (semi3 > semi2)
204 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
205 return __kmp_par_range < 0;
206 }
207 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000208 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
210 return __kmp_par_range > 0;
211 }
212 return __kmp_par_range < 0;
213 }
214 return TRUE;
215
216#endif /* KMP_DEBUG */
217
218}
219
220/*!
221@ingroup THREAD_STATES
222@param loc Source location information.
223@return 1 if this thread is executing inside an active parallel region, zero if not.
224*/
225kmp_int32
226__kmpc_in_parallel( ident_t *loc )
227{
228 return __kmp_entry_thread() -> th.th_root -> r.r_active;
229}
230
231/*!
232@ingroup PARALLEL
233@param loc source location information
234@param global_tid global thread number
235@param num_threads number of threads requested for this parallel construct
236
237Set the number of threads to be used by the next fork spawned by this thread.
238This call is only required if the parallel construct has a `num_threads` clause.
239*/
240void
241__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
242{
243 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
244 global_tid, num_threads ) );
245
246 __kmp_push_num_threads( loc, global_tid, num_threads );
247}
248
249void
250__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
251{
252 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
253
254 /* the num_threads are automatically popped */
255}
256
257
258#if OMP_40_ENABLED
259
260void
261__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
262{
263 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
264 global_tid, proc_bind ) );
265
266 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
267}
268
269#endif /* OMP_40_ENABLED */
270
271
272/*!
273@ingroup PARALLEL
274@param loc source location information
275@param argc total number of arguments in the ellipsis
276@param microtask pointer to callback routine consisting of outlined parallel construct
277@param ... pointers to shared variables that aren't global
278
279Do the actual fork and call the microtask in the relevant number of threads.
280*/
281void
282__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
283{
284 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000285
Jonathan Peyton61118492016-05-20 19:03:38 +0000286#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000287 int inParallel = __kmpc_in_parallel(loc);
288 if (inParallel)
289 {
290 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
291 }
292 else
293 {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000304 int tid = __kmp_tid_from_gtid( gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000305 kmp_info_t *master_th = __kmp_threads[ gtid ];
306 kmp_team_t *parent_team = master_th->th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000307 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000308 parent_team->t.t_implicit_task_taskdata[tid].
309 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
316 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000318#if OMPT_SUPPORT
319 VOLATILE_CAST(void *) microtask, // "unwrapped" task
320#endif
321 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
323/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000324#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 &ap
326#else
327 ap
328#endif
329 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330#if INCLUDE_SSC_MARKS
331 SSC_MARK_JOINING();
332#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333 __kmp_join_call( loc, gtid
334#if OMPT_SUPPORT
335 , fork_context_intel
336#endif
337 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338
339 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000340
341#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000342 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000343 parent_team->t.t_implicit_task_taskdata[tid].
344 ompt_task_info.frame.reenter_runtime_frame = 0;
345 }
346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347 }
348}
349
350#if OMP_40_ENABLED
351/*!
352@ingroup PARALLEL
353@param loc source location information
354@param global_tid global thread number
355@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000356@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357
358Set the number of teams to be used by the teams construct.
359This call is only required if the teams construct has a `num_teams` clause
360or a `thread_limit` clause (or both).
361*/
362void
363__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
364{
365 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
366 global_tid, num_teams, num_threads ) );
367
368 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
369}
370
371/*!
372@ingroup PARALLEL
373@param loc source location information
374@param argc total number of arguments in the ellipsis
375@param microtask pointer to callback routine consisting of outlined teams construct
376@param ... pointers to shared variables that aren't global
377
378Do the actual fork and call the microtask in the relevant number of threads.
379*/
380void
381__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
382{
383 int gtid = __kmp_entry_gtid();
384 kmp_info_t *this_thr = __kmp_threads[ gtid ];
385 va_list ap;
386 va_start( ap, microtask );
387
Jonathan Peyton45be4502015-08-11 21:36:41 +0000388 KMP_COUNT_BLOCK(OMP_TEAMS);
389
Jim Cownie5e8470a2013-09-27 10:38:44 +0000390 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000391 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
393
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000394#if OMPT_SUPPORT
395 kmp_team_t *parent_team = this_thr->th.th_team;
396 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000397 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000398 parent_team->t.t_implicit_task_taskdata[tid].
399 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
400 }
401#endif
402
Jim Cownie5e8470a2013-09-27 10:38:44 +0000403 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000404 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000405 __kmp_push_num_teams( loc, gtid, 0, 0 );
406 }
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000411 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000413#if OMPT_SUPPORT
414 VOLATILE_CAST(void *) microtask, // "unwrapped" task
415#endif
416 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000418#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419 &ap
420#else
421 ap
422#endif
423 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000424 __kmp_join_call( loc, gtid
425#if OMPT_SUPPORT
426 , fork_context_intel
427#endif
428 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000429
430#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000431 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000432 parent_team->t.t_implicit_task_taskdata[tid].
433 ompt_task_info.frame.reenter_runtime_frame = NULL;
434 }
435#endif
436
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000437 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000439 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440 va_end( ap );
441}
442#endif /* OMP_40_ENABLED */
443
444
445//
446// I don't think this function should ever have been exported.
447// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
448// openmp code ever called it, but it's been exported from the RTL for so
449// long that I'm afraid to remove the definition.
450//
451int
452__kmpc_invoke_task_func( int gtid )
453{
454 return __kmp_invoke_task_func( gtid );
455}
456
457/*!
458@ingroup PARALLEL
459@param loc source location information
460@param global_tid global thread number
461
462Enter a serialized parallel construct. This interface is used to handle a
463conditional parallel region, like this,
464@code
465#pragma omp parallel if (condition)
466@endcode
467when the condition is false.
468*/
469void
470__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
471{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000472 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
473 * kmp_fork_call since the tasks to be done are similar in each case.
474 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475}
476
477/*!
478@ingroup PARALLEL
479@param loc source location information
480@param global_tid global thread number
481
482Leave a serialized parallel construct.
483*/
484void
485__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
486{
487 kmp_internal_control_t *top;
488 kmp_info_t *this_thr;
489 kmp_team_t *serial_team;
490
491 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
492
493 /* skip all this code for autopar serialized loops since it results in
494 unacceptable overhead */
495 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
496 return;
497
498 // Not autopar code
499 if( ! TCR_4( __kmp_init_parallel ) )
500 __kmp_parallel_initialize();
501
502 this_thr = __kmp_threads[ global_tid ];
503 serial_team = this_thr->th.th_serial_team;
504
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000505 #if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000506 kmp_task_team_t * task_team = this_thr->th.th_task_team;
507
508 // we need to wait for the proxy tasks before finishing the thread
509 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
Jonathan Peyton7abf9d52016-05-26 18:19:10 +0000510 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) ); // is an ITT object needed here?
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000511 #endif
512
Jim Cownie5e8470a2013-09-27 10:38:44 +0000513 KMP_MB();
514 KMP_DEBUG_ASSERT( serial_team );
515 KMP_ASSERT( serial_team -> t.t_serialized );
516 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
517 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
518 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
519 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
520
521 /* If necessary, pop the internal control stack values and replace the team values */
522 top = serial_team -> t.t_control_stack_top;
523 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000524 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525 serial_team -> t.t_control_stack_top = top -> next;
526 __kmp_free(top);
527 }
528
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529 //if( serial_team -> t.t_serialized > 1 )
530 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000531
532 /* pop dispatch buffers stack */
533 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
534 {
535 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
536 serial_team->t.t_dispatch->th_disp_buffer =
537 serial_team->t.t_dispatch->th_disp_buffer->next;
538 __kmp_free( disp_buffer );
539 }
540
541 -- serial_team -> t.t_serialized;
542 if ( serial_team -> t.t_serialized == 0 ) {
543
544 /* return to the parallel section */
545
546#if KMP_ARCH_X86 || KMP_ARCH_X86_64
547 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
548 __kmp_clear_x87_fpu_status_word();
549 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
550 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
551 }
552#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
553
554 this_thr -> th.th_team = serial_team -> t.t_parent;
555 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
556
557 /* restore values cached in the thread */
558 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
559 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
560 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
561
562 /* TODO the below shouldn't need to be adjusted for serialized teams */
563 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
564 t.t_dispatch[ serial_team -> t.t_master_tid ];
565
Jim Cownie5e8470a2013-09-27 10:38:44 +0000566 __kmp_pop_current_task_from_thread( this_thr );
567
568 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
569 this_thr -> th.th_current_task -> td_flags.executing = 1;
570
571 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000572 // Copy the task team from the new child / old parent team to the thread.
573 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
575 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
576 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000577 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
579 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
580 global_tid, serial_team, serial_team -> t.t_serialized ) );
581 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 }
583
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584 if ( __kmp_env_consistency_check )
585 __kmp_pop_parallel( global_tid, NULL );
586}
587
588/*!
589@ingroup SYNCHRONIZATION
590@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000592Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593depending on the memory ordering convention obeyed by the compiler
594even that may not be necessary).
595*/
596void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000597__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598{
599 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
600
601 /* need explicit __mf() here since use volatile instead in library */
602 KMP_MB(); /* Flush all pending memory write invalidates. */
603
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000604 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
605 #if KMP_MIC
606 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
607 // We shouldn't need it, though, since the ABI rules require that
608 // * If the compiler generates NGO stores it also generates the fence
609 // * If users hand-code NGO stores they should insert the fence
610 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000612 // C74404
613 // This is to address non-temporal store instructions (sfence needed).
614 // The clflush instruction is addressed either (mfence needed).
615 // Probably the non-temporal load monvtdqa instruction should also be addressed.
616 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
617 if ( ! __kmp_cpuinfo.initialized ) {
618 __kmp_query_cpuid( & __kmp_cpuinfo );
619 }; // if
620 if ( ! __kmp_cpuinfo.sse2 ) {
621 // CPU cannot execute SSE2 instructions.
622 } else {
Jonathan Peyton61118492016-05-20 19:03:38 +0000623 #if KMP_COMPILER_ICC
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000624 _mm_mfence();
Jonathan Peytonb7d30cb2016-03-23 16:27:25 +0000625 #elif KMP_COMPILER_MSVC
626 MemoryBarrier();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000627 #else
628 __sync_synchronize();
629 #endif // KMP_COMPILER_ICC
630 }; // if
631 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000632 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
633 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000634 #elif KMP_ARCH_PPC64
635 // Nothing needed here (we have a real MB above).
636 #if KMP_OS_CNK
637 // The flushing thread needs to yield here; this prevents a
638 // busy-waiting thread from saturating the pipeline. flush is
639 // often used in loops like this:
640 // while (!flag) {
641 // #pragma omp flush(flag)
642 // }
643 // and adding the yield here is good for at least a 10x speedup
644 // when running >2 threads per core (on the NAS LU benchmark).
645 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000647 #else
648 #error Unknown or unsupported architecture
649 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650
651}
652
653/* -------------------------------------------------------------------------- */
654
655/* -------------------------------------------------------------------------- */
656
657/*!
658@ingroup SYNCHRONIZATION
659@param loc source location information
660@param global_tid thread id.
661
662Execute a barrier.
663*/
664void
665__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
666{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000667 KMP_COUNT_BLOCK(OMP_BARRIER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000668 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
669
670 if (! TCR_4(__kmp_init_parallel))
671 __kmp_parallel_initialize();
672
673 if ( __kmp_env_consistency_check ) {
674 if ( loc == 0 ) {
675 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
676 }; // if
677
678 __kmp_check_barrier( global_tid, ct_barrier, loc );
679 }
680
681 __kmp_threads[ global_tid ]->th.th_ident = loc;
682 // TODO: explicit barrier_wait_id:
683 // this function is called when 'barrier' directive is present or
684 // implicit barrier at the end of a worksharing construct.
685 // 1) better to add a per-thread barrier counter to a thread data structure
686 // 2) set to 0 when a new team is created
687 // 4) no sync is required
688
689 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
690}
691
692/* The BARRIER for a MASTER section is always explicit */
693/*!
694@ingroup WORK_SHARING
695@param loc source location information.
696@param global_tid global thread number .
697@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
698*/
699kmp_int32
700__kmpc_master(ident_t *loc, kmp_int32 global_tid)
701{
702 int status = 0;
703
704 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
705
706 if( ! TCR_4( __kmp_init_parallel ) )
707 __kmp_parallel_initialize();
708
Jonathan Peyton45be4502015-08-11 21:36:41 +0000709 if( KMP_MASTER_GTID( global_tid )) {
Jonathan Peyton30138252016-03-03 21:21:05 +0000710 KMP_COUNT_BLOCK(OMP_MASTER);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000711 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000712 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000713 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000715#if OMPT_SUPPORT && OMPT_TRACE
716 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000717 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000718 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000719 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
720 kmp_team_t *team = this_thr -> th.th_team;
721
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000722 int tid = __kmp_tid_from_gtid( global_tid );
723 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
724 team->t.ompt_team_info.parallel_id,
725 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
726 }
727 }
728#endif
729
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000731#if KMP_USE_DYNAMIC_LOCK
732 if (status)
733 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
734 else
735 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
736#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737 if (status)
738 __kmp_push_sync( global_tid, ct_master, loc, NULL );
739 else
740 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000741#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742 }
743
744 return status;
745}
746
747/*!
748@ingroup WORK_SHARING
749@param loc source location information.
750@param global_tid global thread number .
751
752Mark the end of a <tt>master</tt> region. This should only be called by the thread
753that executes the <tt>master</tt> region.
754*/
755void
756__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
757{
758 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
759
760 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000761 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000762
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000763#if OMPT_SUPPORT && OMPT_TRACE
764 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
765 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000766 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000767 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
768 int tid = __kmp_tid_from_gtid( global_tid );
769 ompt_callbacks.ompt_callback(ompt_event_master_end)(
770 team->t.ompt_team_info.parallel_id,
771 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
772 }
773#endif
774
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775 if ( __kmp_env_consistency_check ) {
776 if( global_tid < 0 )
777 KMP_WARNING( ThreadIdentInvalid );
778
779 if( KMP_MASTER_GTID( global_tid ))
780 __kmp_pop_sync( global_tid, ct_master, loc );
781 }
782}
783
784/*!
785@ingroup WORK_SHARING
786@param loc source location information.
787@param gtid global thread number.
788
789Start execution of an <tt>ordered</tt> construct.
790*/
791void
792__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
793{
794 int cid = 0;
795 kmp_info_t *th;
796 KMP_DEBUG_ASSERT( __kmp_init_serial );
797
798 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
799
800 if (! TCR_4(__kmp_init_parallel))
801 __kmp_parallel_initialize();
802
803#if USE_ITT_BUILD
804 __kmp_itt_ordered_prep( gtid );
805 // TODO: ordered_wait_id
806#endif /* USE_ITT_BUILD */
807
808 th = __kmp_threads[ gtid ];
809
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000810#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000811 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000812 /* OMPT state update */
813 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
814 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
815
816 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000817 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000818 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
819 th->th.ompt_thread_info.wait_id);
820 }
821 }
822#endif
823
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
825 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
826 else
827 __kmp_parallel_deo( & gtid, & cid, loc );
828
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000829#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000830 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000831 /* OMPT state update */
832 th->th.ompt_thread_info.state = ompt_state_work_parallel;
833 th->th.ompt_thread_info.wait_id = 0;
834
835 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000836 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000837 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
838 th->th.ompt_thread_info.wait_id);
839 }
840 }
841#endif
842
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843#if USE_ITT_BUILD
844 __kmp_itt_ordered_start( gtid );
845#endif /* USE_ITT_BUILD */
846}
847
848/*!
849@ingroup WORK_SHARING
850@param loc source location information.
851@param gtid global thread number.
852
853End execution of an <tt>ordered</tt> construct.
854*/
855void
856__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
857{
858 int cid = 0;
859 kmp_info_t *th;
860
861 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
862
863#if USE_ITT_BUILD
864 __kmp_itt_ordered_end( gtid );
865 // TODO: ordered_wait_id
866#endif /* USE_ITT_BUILD */
867
868 th = __kmp_threads[ gtid ];
869
870 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
871 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
872 else
873 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000874
875#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000876 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000877 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
878 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
879 th->th.ompt_thread_info.wait_id);
880 }
881#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882}
883
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000884#if KMP_USE_DYNAMIC_LOCK
885
Jonathan Peytondae13d82015-12-11 21:57:06 +0000886static __forceinline void
887__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000888{
Jonathan Peytondae13d82015-12-11 21:57:06 +0000889 // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
890 void *idx;
891 kmp_indirect_lock_t **lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000892 lck = (kmp_indirect_lock_t **)crit;
Jonathan Peytondae13d82015-12-11 21:57:06 +0000893 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
894 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
895 KMP_SET_I_LOCK_LOCATION(ilk, loc);
896 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
897 KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000898#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000899 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000900#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000901 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
902 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000903#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000904 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000905#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000906 // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
907 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000908 }
Jonathan Peytondae13d82015-12-11 21:57:06 +0000909 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000910}
911
912// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000913#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000915 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
916 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917 kmp_uint32 spins; \
918 KMP_FSYNC_PREPARE(l); \
919 KMP_INIT_YIELD(spins); \
920 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
921 KMP_YIELD(TRUE); \
922 } else { \
923 KMP_YIELD_SPIN(spins); \
924 } \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000925 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000926 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
927 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000928 __kmp_spin_backoff(&backoff); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000929 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
930 KMP_YIELD(TRUE); \
931 } else { \
932 KMP_YIELD_SPIN(spins); \
933 } \
934 } \
935 } \
936 KMP_FSYNC_ACQUIRED(l); \
937}
938
939// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000940#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000941 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000942 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
943 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000944}
945
946// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000947#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
948 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000949 KMP_MB(); \
950}
951
Jonathan Peytondae13d82015-12-11 21:57:06 +0000952#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000953
954# include <unistd.h>
955# include <sys/syscall.h>
956# ifndef FUTEX_WAIT
957# define FUTEX_WAIT 0
958# endif
959# ifndef FUTEX_WAKE
960# define FUTEX_WAKE 1
961# endif
962
963// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000964#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000965 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
966 kmp_int32 gtid_code = (gtid+1) << 1; \
967 KMP_MB(); \
968 KMP_FSYNC_PREPARE(ftx); \
969 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000970 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
971 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
972 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000973 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000974 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000975 continue; \
976 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000977 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000978 } \
979 kmp_int32 rc; \
980 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
981 continue; \
982 } \
983 gtid_code |= 1; \
984 } \
985 KMP_FSYNC_ACQUIRED(ftx); \
986}
987
988// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000989#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000990 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000991 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000992 KMP_FSYNC_ACQUIRED(ftx); \
993 rc = TRUE; \
994 } else { \
995 rc = FALSE; \
996 } \
997}
998
999// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001000#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001001 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1002 KMP_MB(); \
1003 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001004 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1005 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1006 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001007 } \
1008 KMP_MB(); \
1009 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1010}
1011
Jonathan Peytondae13d82015-12-11 21:57:06 +00001012#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001013
1014#else // KMP_USE_DYNAMIC_LOCK
1015
Jim Cownie5e8470a2013-09-27 10:38:44 +00001016static kmp_user_lock_p
1017__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1018{
1019 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1020
1021 //
1022 // Because of the double-check, the following load
1023 // doesn't need to be volatile.
1024 //
1025 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1026
1027 if ( lck == NULL ) {
1028 void * idx;
1029
1030 // Allocate & initialize the lock.
1031 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1032 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1033 __kmp_init_user_lock_with_checks( lck );
1034 __kmp_set_user_lock_location( lck, loc );
1035#if USE_ITT_BUILD
1036 __kmp_itt_critical_creating( lck );
1037 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1038 // lock. It is the only place where we can guarantee it. There are chances the lock will
1039 // destroyed with no usage, but it is not a problem, because this is not real event seen
1040 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1041#endif /* USE_ITT_BUILD */
1042
1043 //
1044 // Use a cmpxchg instruction to slam the start of the critical
1045 // section with the lock pointer. If another thread beat us
1046 // to it, deallocate the lock, and use the lock that the other
1047 // thread allocated.
1048 //
1049 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1050
1051 if ( status == 0 ) {
1052 // Deallocate the lock and reload the value.
1053#if USE_ITT_BUILD
1054 __kmp_itt_critical_destroyed( lck );
1055 // Let ITT know the lock is destroyed and the same memory location may be reused for
1056 // another purpose.
1057#endif /* USE_ITT_BUILD */
1058 __kmp_destroy_user_lock_with_checks( lck );
1059 __kmp_user_lock_free( &idx, gtid, lck );
1060 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1061 KMP_DEBUG_ASSERT( lck != NULL );
1062 }
1063 }
1064 return lck;
1065}
1066
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001067#endif // KMP_USE_DYNAMIC_LOCK
1068
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069/*!
1070@ingroup WORK_SHARING
1071@param loc source location information.
1072@param global_tid global thread number .
1073@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1074some other suitably unique value.
1075
1076Enter code protected by a `critical` construct.
1077This function blocks until the executing thread can enter the critical section.
1078*/
1079void
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001080__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1081{
1082#if KMP_USE_DYNAMIC_LOCK
1083 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1084#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001085 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001086 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087 kmp_user_lock_p lck;
1088
1089 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1090
1091 //TODO: add THR_OVHD_STATE
1092
1093 KMP_CHECK_USER_LOCK_INIT();
1094
1095 if ( ( __kmp_user_lock_kind == lk_tas )
1096 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1097 lck = (kmp_user_lock_p)crit;
1098 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001099#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 else if ( ( __kmp_user_lock_kind == lk_futex )
1101 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1102 lck = (kmp_user_lock_p)crit;
1103 }
1104#endif
1105 else { // ticket, queuing or drdpa
1106 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1107 }
1108
1109 if ( __kmp_env_consistency_check )
1110 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1111
1112 /* since the critical directive binds to all threads, not just
1113 * the current team we have to check this even if we are in a
1114 * serialized team */
1115 /* also, even if we are the uber thread, we still have to conduct the lock,
1116 * as we have to contend with sibling threads */
1117
1118#if USE_ITT_BUILD
1119 __kmp_itt_critical_acquiring( lck );
1120#endif /* USE_ITT_BUILD */
1121 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001122 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1123
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001124#if USE_ITT_BUILD
1125 __kmp_itt_critical_acquired( lck );
1126#endif /* USE_ITT_BUILD */
1127
Jonathan Peyton93a879c2016-03-21 18:32:26 +00001128 KMP_START_EXPLICIT_TIMER(OMP_critical);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001129 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001130#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001131}
1132
1133#if KMP_USE_DYNAMIC_LOCK
1134
1135// Converts the given hint to an internal lock implementation
1136static __forceinline kmp_dyna_lockseq_t
1137__kmp_map_hint_to_lock(uintptr_t hint)
1138{
1139#if KMP_USE_TSX
1140# define KMP_TSX_LOCK(seq) lockseq_##seq
1141#else
1142# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1143#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001144
1145#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1146# define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1147#else
1148# define KMP_CPUINFO_RTM 0
1149#endif
1150
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001151 // Hints that do not require further logic
1152 if (hint & kmp_lock_hint_hle)
1153 return KMP_TSX_LOCK(hle);
1154 if (hint & kmp_lock_hint_rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001155 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001156 if (hint & kmp_lock_hint_adaptive)
Hal Finkel01bb2402016-03-27 13:24:09 +00001157 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001158
1159 // Rule out conflicting hints first by returning the default lock
1160 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1161 return __kmp_user_lock_seq;
1162 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1163 return __kmp_user_lock_seq;
1164
1165 // Do not even consider speculation when it appears to be contended
1166 if (hint & omp_lock_hint_contended)
1167 return lockseq_queuing;
1168
1169 // Uncontended lock without speculation
1170 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1171 return lockseq_tas;
1172
1173 // HLE lock for speculation
1174 if (hint & omp_lock_hint_speculative)
1175 return KMP_TSX_LOCK(hle);
1176
1177 return __kmp_user_lock_seq;
1178}
1179
1180/*!
1181@ingroup WORK_SHARING
1182@param loc source location information.
1183@param global_tid global thread number.
1184@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
1185or some other suitably unique value.
1186@param hint the lock hint.
1187
1188Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
1189This function blocks until the executing thread can enter the critical section unless the hint suggests use of
1190speculative execution and the hardware supports it.
1191*/
1192void
1193__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1194{
1195 KMP_COUNT_BLOCK(OMP_CRITICAL);
1196 kmp_user_lock_p lck;
1197
1198 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1199
1200 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1201 // Check if it is initialized.
1202 if (*lk == 0) {
1203 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1204 if (KMP_IS_D_LOCK(lckseq)) {
1205 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1206 } else {
1207 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1208 }
1209 }
1210 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1211 // this lock initialization does not follow the normal dispatch path (lock table is not used).
1212 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1213 lck = (kmp_user_lock_p)lk;
1214 if (__kmp_env_consistency_check) {
1215 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1216 }
1217# if USE_ITT_BUILD
1218 __kmp_itt_critical_acquiring(lck);
1219# endif
1220# if KMP_USE_INLINED_TAS
1221 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1222 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1223 } else
1224# elif KMP_USE_INLINED_FUTEX
1225 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1226 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1227 } else
1228# endif
1229 {
1230 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1231 }
1232 } else {
1233 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1234 lck = ilk->lock;
1235 if (__kmp_env_consistency_check) {
1236 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1237 }
1238# if USE_ITT_BUILD
1239 __kmp_itt_critical_acquiring(lck);
1240# endif
1241 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1242 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001243
Jim Cownie5e8470a2013-09-27 10:38:44 +00001244#if USE_ITT_BUILD
1245 __kmp_itt_critical_acquired( lck );
1246#endif /* USE_ITT_BUILD */
1247
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001248 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001250} // __kmpc_critical_with_hint
1251
1252#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001253
1254/*!
1255@ingroup WORK_SHARING
1256@param loc source location information.
1257@param global_tid global thread number .
1258@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1259some other suitably unique value.
1260
1261Leave a critical section, releasing any lock that was held during its execution.
1262*/
1263void
1264__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1265{
1266 kmp_user_lock_p lck;
1267
1268 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1269
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001270#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001271 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001272 lck = (kmp_user_lock_p)crit;
1273 KMP_ASSERT(lck != NULL);
1274 if (__kmp_env_consistency_check) {
1275 __kmp_pop_sync(global_tid, ct_critical, loc);
1276 }
1277# if USE_ITT_BUILD
1278 __kmp_itt_critical_releasing( lck );
1279# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00001280# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001281 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001282 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001283 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00001284# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001285 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001286 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001287 } else
1288# endif
1289 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001290 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001291 }
1292 } else {
1293 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1294 KMP_ASSERT(ilk != NULL);
1295 lck = ilk->lock;
1296 if (__kmp_env_consistency_check) {
1297 __kmp_pop_sync(global_tid, ct_critical, loc);
1298 }
1299# if USE_ITT_BUILD
1300 __kmp_itt_critical_releasing( lck );
1301# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001302 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001303 }
1304
1305#else // KMP_USE_DYNAMIC_LOCK
1306
Jim Cownie5e8470a2013-09-27 10:38:44 +00001307 if ( ( __kmp_user_lock_kind == lk_tas )
1308 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1309 lck = (kmp_user_lock_p)crit;
1310 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001311#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001312 else if ( ( __kmp_user_lock_kind == lk_futex )
1313 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1314 lck = (kmp_user_lock_p)crit;
1315 }
1316#endif
1317 else { // ticket, queuing or drdpa
1318 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1319 }
1320
1321 KMP_ASSERT(lck != NULL);
1322
1323 if ( __kmp_env_consistency_check )
1324 __kmp_pop_sync( global_tid, ct_critical, loc );
1325
1326#if USE_ITT_BUILD
1327 __kmp_itt_critical_releasing( lck );
1328#endif /* USE_ITT_BUILD */
1329 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001330 __kmp_release_user_lock_with_checks( lck, global_tid );
1331
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001332#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001333 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001334 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1335 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1336 (uint64_t) lck);
1337 }
1338#endif
1339
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001340#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001341 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1343}
1344
1345/*!
1346@ingroup SYNCHRONIZATION
1347@param loc source location information
1348@param global_tid thread id.
1349@return one if the thread should execute the master block, zero otherwise
1350
1351Start execution of a combined barrier and master. The barrier is executed inside this function.
1352*/
1353kmp_int32
1354__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1355{
1356 int status;
1357
1358 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1359
1360 if (! TCR_4(__kmp_init_parallel))
1361 __kmp_parallel_initialize();
1362
1363 if ( __kmp_env_consistency_check )
1364 __kmp_check_barrier( global_tid, ct_barrier, loc );
1365
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001366#if USE_ITT_NOTIFY
1367 __kmp_threads[global_tid]->th.th_ident = loc;
1368#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001369 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1370
1371 return (status != 0) ? 0 : 1;
1372}
1373
1374/*!
1375@ingroup SYNCHRONIZATION
1376@param loc source location information
1377@param global_tid thread id.
1378
1379Complete the execution of a combined barrier and master. This function should
1380only be called at the completion of the <tt>master</tt> code. Other threads will
1381still be waiting at the barrier and this call releases them.
1382*/
1383void
1384__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1385{
1386 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1387
1388 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1389}
1390
1391/*!
1392@ingroup SYNCHRONIZATION
1393@param loc source location information
1394@param global_tid thread id.
1395@return one if the thread should execute the master block, zero otherwise
1396
1397Start execution of a combined barrier and master(nowait) construct.
1398The barrier is executed inside this function.
1399There is no equivalent "end" function, since the
1400*/
1401kmp_int32
1402__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1403{
1404 kmp_int32 ret;
1405
1406 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1407
1408 if (! TCR_4(__kmp_init_parallel))
1409 __kmp_parallel_initialize();
1410
1411 if ( __kmp_env_consistency_check ) {
1412 if ( loc == 0 ) {
1413 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1414 }
1415 __kmp_check_barrier( global_tid, ct_barrier, loc );
1416 }
1417
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418#if USE_ITT_NOTIFY
1419 __kmp_threads[global_tid]->th.th_ident = loc;
1420#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1422
1423 ret = __kmpc_master (loc, global_tid);
1424
1425 if ( __kmp_env_consistency_check ) {
1426 /* there's no __kmpc_end_master called; so the (stats) */
1427 /* actions of __kmpc_end_master are done here */
1428
1429 if ( global_tid < 0 ) {
1430 KMP_WARNING( ThreadIdentInvalid );
1431 }
1432 if (ret) {
1433 /* only one thread should do the pop since only */
1434 /* one did the push (see __kmpc_master()) */
1435
1436 __kmp_pop_sync( global_tid, ct_master, loc );
1437 }
1438 }
1439
1440 return (ret);
1441}
1442
1443/* The BARRIER for a SINGLE process section is always explicit */
1444/*!
1445@ingroup WORK_SHARING
1446@param loc source location information
1447@param global_tid global thread number
1448@return One if this thread should execute the single construct, zero otherwise.
1449
1450Test whether to execute a <tt>single</tt> construct.
1451There are no implicit barriers in the two "single" calls, rather the compiler should
1452introduce an explicit barrier if it is required.
1453*/
1454
1455kmp_int32
1456__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1457{
1458 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton30138252016-03-03 21:21:05 +00001459
1460 if (rc) {
1461 // We are going to execute the single statement, so we should count it.
1462 KMP_COUNT_BLOCK(OMP_SINGLE);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001463 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001464 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001465
1466#if OMPT_SUPPORT && OMPT_TRACE
1467 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1468 kmp_team_t *team = this_thr -> th.th_team;
1469 int tid = __kmp_tid_from_gtid( global_tid );
1470
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001471 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001472 if (rc) {
1473 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1474 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1475 team->t.ompt_team_info.parallel_id,
1476 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1477 team->t.ompt_team_info.microtask);
1478 }
1479 } else {
1480 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1481 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1482 team->t.ompt_team_info.parallel_id,
1483 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1484 }
1485 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1486 }
1487 }
1488#endif
1489
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490 return rc;
1491}
1492
1493/*!
1494@ingroup WORK_SHARING
1495@param loc source location information
1496@param global_tid global thread number
1497
1498Mark the end of a <tt>single</tt> construct. This function should
1499only be called by the thread that executed the block of code protected
1500by the `single` construct.
1501*/
1502void
1503__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1504{
1505 __kmp_exit_single( global_tid );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001506 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001507
1508#if OMPT_SUPPORT && OMPT_TRACE
1509 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1510 kmp_team_t *team = this_thr -> th.th_team;
1511 int tid = __kmp_tid_from_gtid( global_tid );
1512
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001513 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001514 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1515 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1516 team->t.ompt_team_info.parallel_id,
1517 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1518 }
1519#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001520}
1521
1522/*!
1523@ingroup WORK_SHARING
1524@param loc Source location
1525@param global_tid Global thread id
1526
1527Mark the end of a statically scheduled loop.
1528*/
1529void
1530__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1531{
1532 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1533
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001534#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001535 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001536 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001537 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1538 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001539 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001540 team_info->parallel_id, task_info->task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001541 }
1542#endif
1543
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544 if ( __kmp_env_consistency_check )
1545 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1546}
1547
1548/*
1549 * User routines which take C-style arguments (call by value)
1550 * different from the Fortran equivalent routines
1551 */
1552
1553void
1554ompc_set_num_threads( int arg )
1555{
1556// !!!!! TODO: check the per-task binding
1557 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1558}
1559
1560void
1561ompc_set_dynamic( int flag )
1562{
1563 kmp_info_t *thread;
1564
1565 /* For the thread-private implementation of the internal controls */
1566 thread = __kmp_entry_thread();
1567
1568 __kmp_save_internal_controls( thread );
1569
1570 set__dynamic( thread, flag ? TRUE : FALSE );
1571}
1572
1573void
1574ompc_set_nested( int flag )
1575{
1576 kmp_info_t *thread;
1577
1578 /* For the thread-private internal controls implementation */
1579 thread = __kmp_entry_thread();
1580
1581 __kmp_save_internal_controls( thread );
1582
1583 set__nested( thread, flag ? TRUE : FALSE );
1584}
1585
Jim Cownie5e8470a2013-09-27 10:38:44 +00001586void
1587ompc_set_max_active_levels( int max_active_levels )
1588{
1589 /* TO DO */
1590 /* we want per-task implementation of this internal control */
1591
1592 /* For the per-thread internal controls implementation */
1593 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1594}
1595
1596void
1597ompc_set_schedule( omp_sched_t kind, int modifier )
1598{
1599// !!!!! TODO: check the per-task binding
1600 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1601}
1602
1603int
1604ompc_get_ancestor_thread_num( int level )
1605{
1606 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1607}
1608
1609int
1610ompc_get_team_size( int level )
1611{
1612 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1613}
1614
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615void
1616kmpc_set_stacksize( int arg )
1617{
1618 // __kmp_aux_set_stacksize initializes the library if needed
1619 __kmp_aux_set_stacksize( arg );
1620}
1621
1622void
1623kmpc_set_stacksize_s( size_t arg )
1624{
1625 // __kmp_aux_set_stacksize initializes the library if needed
1626 __kmp_aux_set_stacksize( arg );
1627}
1628
1629void
1630kmpc_set_blocktime( int arg )
1631{
1632 int gtid, tid;
1633 kmp_info_t *thread;
1634
1635 gtid = __kmp_entry_gtid();
1636 tid = __kmp_tid_from_gtid(gtid);
1637 thread = __kmp_thread_from_gtid(gtid);
1638
1639 __kmp_aux_set_blocktime( arg, thread, tid );
1640}
1641
1642void
1643kmpc_set_library( int arg )
1644{
1645 // __kmp_user_set_library initializes the library if needed
1646 __kmp_user_set_library( (enum library_type)arg );
1647}
1648
1649void
1650kmpc_set_defaults( char const * str )
1651{
1652 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001653 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654}
1655
Jonathan Peyton067325f2016-05-31 19:01:15 +00001656void
1657kmpc_set_disp_num_buffers( int arg )
1658{
1659 // ignore after initialization because some teams have already
1660 // allocated dispatch buffers
1661 if( __kmp_init_serial == 0 && arg > 0 )
1662 __kmp_dispatch_num_buffers = arg;
1663}
1664
Jim Cownie5e8470a2013-09-27 10:38:44 +00001665int
1666kmpc_set_affinity_mask_proc( int proc, void **mask )
1667{
Alp Toker98758b02014-03-02 04:12:06 +00001668#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001669 return -1;
1670#else
1671 if ( ! TCR_4(__kmp_init_middle) ) {
1672 __kmp_middle_initialize();
1673 }
1674 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1675#endif
1676}
1677
1678int
1679kmpc_unset_affinity_mask_proc( int proc, void **mask )
1680{
Alp Toker98758b02014-03-02 04:12:06 +00001681#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682 return -1;
1683#else
1684 if ( ! TCR_4(__kmp_init_middle) ) {
1685 __kmp_middle_initialize();
1686 }
1687 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1688#endif
1689}
1690
1691int
1692kmpc_get_affinity_mask_proc( int proc, void **mask )
1693{
Alp Toker98758b02014-03-02 04:12:06 +00001694#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 return -1;
1696#else
1697 if ( ! TCR_4(__kmp_init_middle) ) {
1698 __kmp_middle_initialize();
1699 }
1700 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1701#endif
1702}
1703
Jim Cownie5e8470a2013-09-27 10:38:44 +00001704
1705/* -------------------------------------------------------------------------- */
1706/*!
1707@ingroup THREADPRIVATE
1708@param loc source location information
1709@param gtid global thread number
1710@param cpy_size size of the cpy_data buffer
1711@param cpy_data pointer to data to be copied
1712@param cpy_func helper function to call for copying data
1713@param didit flag variable: 1=single thread; 0=not single thread
1714
1715__kmpc_copyprivate implements the interface for the private data broadcast needed for
1716the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1717All threads participating in the parallel region call this routine.
1718One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1719and all other threads should have that variable set to 0.
1720All threads pass a pointer to a data buffer (cpy_data) that they have built.
1721
1722The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1723clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1724race conditions, so the code generation for the single region should avoid generating a barrier
1725after the call to @ref __kmpc_copyprivate.
1726
1727The <tt>gtid</tt> parameter is the global thread id for the current thread.
1728The <tt>loc</tt> parameter is a pointer to source location information.
1729
1730Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1731to a team-private location, then the other threads will each call the function pointed to by
1732the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1733
1734The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1735and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1736the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1737to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1738
1739The interface to cpy_func is as follows:
1740@code
1741void cpy_func( void *destination, void *source )
1742@endcode
1743where void *destination is the cpy_data pointer for the thread being copied to
1744and void *source is the cpy_data pointer for the thread being copied from.
1745*/
1746void
1747__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1748{
1749 void **data_ptr;
1750
1751 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1752
1753 KMP_MB();
1754
1755 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1756
1757 if ( __kmp_env_consistency_check ) {
1758 if ( loc == 0 ) {
1759 KMP_WARNING( ConstructIdentInvalid );
1760 }
1761 }
1762
1763 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1764
1765 if (didit) *data_ptr = cpy_data;
1766
1767 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001768#if USE_ITT_NOTIFY
1769 __kmp_threads[gtid]->th.th_ident = loc;
1770#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001771 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1772
1773 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1774
1775 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1776 /* Nesting checks are already handled by the single construct checks */
1777
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778#if USE_ITT_NOTIFY
1779 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1780#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001781 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1782}
1783
1784/* -------------------------------------------------------------------------- */
1785
1786#define INIT_LOCK __kmp_init_user_lock_with_checks
1787#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1788#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1789#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1790#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1791#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1792#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1793#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1794#define TEST_LOCK __kmp_test_user_lock_with_checks
1795#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1796#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1797#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1798
1799
1800/*
1801 * TODO: Make check abort messages use location info & pass it
1802 * into with_checks routines
1803 */
1804
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001805#if KMP_USE_DYNAMIC_LOCK
1806
1807// internal lock initializer
1808static __forceinline void
1809__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1810{
1811 if (KMP_IS_D_LOCK(seq)) {
1812 KMP_INIT_D_LOCK(lock, seq);
1813#if USE_ITT_BUILD
1814 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1815#endif
1816 } else {
1817 KMP_INIT_I_LOCK(lock, seq);
1818#if USE_ITT_BUILD
1819 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1820 __kmp_itt_lock_creating(ilk->lock, loc);
1821#endif
1822 }
1823}
1824
1825// internal nest lock initializer
1826static __forceinline void
1827__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1828{
1829#if KMP_USE_TSX
1830 // Don't have nested lock implementation for speculative locks
1831 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1832 seq = __kmp_user_lock_seq;
1833#endif
1834 switch (seq) {
1835 case lockseq_tas:
1836 seq = lockseq_nested_tas;
1837 break;
1838#if KMP_USE_FUTEX
1839 case lockseq_futex:
1840 seq = lockseq_nested_futex;
1841 break;
1842#endif
1843 case lockseq_ticket:
1844 seq = lockseq_nested_ticket;
1845 break;
1846 case lockseq_queuing:
1847 seq = lockseq_nested_queuing;
1848 break;
1849 case lockseq_drdpa:
1850 seq = lockseq_nested_drdpa;
1851 break;
1852 default:
1853 seq = lockseq_nested_queuing;
1854 }
1855 KMP_INIT_I_LOCK(lock, seq);
1856#if USE_ITT_BUILD
1857 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1858 __kmp_itt_lock_creating(ilk->lock, loc);
1859#endif
1860}
1861
1862/* initialize the lock with a hint */
1863void
1864__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1865{
1866 KMP_DEBUG_ASSERT(__kmp_init_serial);
1867 if (__kmp_env_consistency_check && user_lock == NULL) {
1868 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1869 }
1870
1871 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1872}
1873
1874/* initialize the lock with a hint */
1875void
1876__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1877{
1878 KMP_DEBUG_ASSERT(__kmp_init_serial);
1879 if (__kmp_env_consistency_check && user_lock == NULL) {
1880 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1881 }
1882
1883 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1884}
1885
1886#endif // KMP_USE_DYNAMIC_LOCK
1887
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888/* initialize the lock */
1889void
1890__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001891#if KMP_USE_DYNAMIC_LOCK
1892 KMP_DEBUG_ASSERT(__kmp_init_serial);
1893 if (__kmp_env_consistency_check && user_lock == NULL) {
1894 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1895 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001896 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001897
1898#else // KMP_USE_DYNAMIC_LOCK
1899
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900 static char const * const func = "omp_init_lock";
1901 kmp_user_lock_p lck;
1902 KMP_DEBUG_ASSERT( __kmp_init_serial );
1903
1904 if ( __kmp_env_consistency_check ) {
1905 if ( user_lock == NULL ) {
1906 KMP_FATAL( LockIsUninitialized, func );
1907 }
1908 }
1909
1910 KMP_CHECK_USER_LOCK_INIT();
1911
1912 if ( ( __kmp_user_lock_kind == lk_tas )
1913 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1914 lck = (kmp_user_lock_p)user_lock;
1915 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001916#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917 else if ( ( __kmp_user_lock_kind == lk_futex )
1918 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1919 lck = (kmp_user_lock_p)user_lock;
1920 }
1921#endif
1922 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001923 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 }
1925 INIT_LOCK( lck );
1926 __kmp_set_user_lock_location( lck, loc );
1927
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001928#if OMPT_SUPPORT && OMPT_TRACE
1929 if (ompt_enabled &&
1930 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1931 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1932 }
1933#endif
1934
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935#if USE_ITT_BUILD
1936 __kmp_itt_lock_creating( lck );
1937#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001938
1939#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940} // __kmpc_init_lock
1941
1942/* initialize the lock */
1943void
1944__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001945#if KMP_USE_DYNAMIC_LOCK
1946
1947 KMP_DEBUG_ASSERT(__kmp_init_serial);
1948 if (__kmp_env_consistency_check && user_lock == NULL) {
1949 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1950 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001951 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001952
1953#else // KMP_USE_DYNAMIC_LOCK
1954
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 static char const * const func = "omp_init_nest_lock";
1956 kmp_user_lock_p lck;
1957 KMP_DEBUG_ASSERT( __kmp_init_serial );
1958
1959 if ( __kmp_env_consistency_check ) {
1960 if ( user_lock == NULL ) {
1961 KMP_FATAL( LockIsUninitialized, func );
1962 }
1963 }
1964
1965 KMP_CHECK_USER_LOCK_INIT();
1966
1967 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1968 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1969 lck = (kmp_user_lock_p)user_lock;
1970 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001971#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001972 else if ( ( __kmp_user_lock_kind == lk_futex )
1973 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1974 <= OMP_NEST_LOCK_T_SIZE ) ) {
1975 lck = (kmp_user_lock_p)user_lock;
1976 }
1977#endif
1978 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001979 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 }
1981
1982 INIT_NESTED_LOCK( lck );
1983 __kmp_set_user_lock_location( lck, loc );
1984
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001985#if OMPT_SUPPORT && OMPT_TRACE
1986 if (ompt_enabled &&
1987 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1988 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
1989 }
1990#endif
1991
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#if USE_ITT_BUILD
1993 __kmp_itt_lock_creating( lck );
1994#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001995
1996#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997} // __kmpc_init_nest_lock
1998
1999void
2000__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002001#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002003# if USE_ITT_BUILD
2004 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002005 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2006 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002007 } else {
2008 lck = (kmp_user_lock_p)user_lock;
2009 }
2010 __kmp_itt_lock_destroyed(lck);
2011# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002012 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002013#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014 kmp_user_lock_p lck;
2015
2016 if ( ( __kmp_user_lock_kind == lk_tas )
2017 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2018 lck = (kmp_user_lock_p)user_lock;
2019 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002020#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021 else if ( ( __kmp_user_lock_kind == lk_futex )
2022 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2023 lck = (kmp_user_lock_p)user_lock;
2024 }
2025#endif
2026 else {
2027 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2028 }
2029
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002030#if OMPT_SUPPORT && OMPT_TRACE
2031 if (ompt_enabled &&
2032 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2033 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2034 }
2035#endif
2036
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037#if USE_ITT_BUILD
2038 __kmp_itt_lock_destroyed( lck );
2039#endif /* USE_ITT_BUILD */
2040 DESTROY_LOCK( lck );
2041
2042 if ( ( __kmp_user_lock_kind == lk_tas )
2043 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2044 ;
2045 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002046#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 else if ( ( __kmp_user_lock_kind == lk_futex )
2048 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2049 ;
2050 }
2051#endif
2052 else {
2053 __kmp_user_lock_free( user_lock, gtid, lck );
2054 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002055#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056} // __kmpc_destroy_lock
2057
2058/* destroy the lock */
2059void
2060__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002061#if KMP_USE_DYNAMIC_LOCK
2062
2063# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002064 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002065 __kmp_itt_lock_destroyed(ilk->lock);
2066# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002067 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002068
2069#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002070
2071 kmp_user_lock_p lck;
2072
2073 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2074 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2075 lck = (kmp_user_lock_p)user_lock;
2076 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002077#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078 else if ( ( __kmp_user_lock_kind == lk_futex )
2079 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2080 <= OMP_NEST_LOCK_T_SIZE ) ) {
2081 lck = (kmp_user_lock_p)user_lock;
2082 }
2083#endif
2084 else {
2085 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2086 }
2087
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002088#if OMPT_SUPPORT && OMPT_TRACE
2089 if (ompt_enabled &&
2090 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2091 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2092 }
2093#endif
2094
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095#if USE_ITT_BUILD
2096 __kmp_itt_lock_destroyed( lck );
2097#endif /* USE_ITT_BUILD */
2098
2099 DESTROY_NESTED_LOCK( lck );
2100
2101 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2102 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2103 ;
2104 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002105#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106 else if ( ( __kmp_user_lock_kind == lk_futex )
2107 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2108 <= OMP_NEST_LOCK_T_SIZE ) ) {
2109 ;
2110 }
2111#endif
2112 else {
2113 __kmp_user_lock_free( user_lock, gtid, lck );
2114 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002115#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002116} // __kmpc_destroy_nest_lock
2117
2118void
2119__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002120 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002121#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002122 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002123# if USE_ITT_BUILD
2124 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2125# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002126# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002127 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002128 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002129 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002130# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002131 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002132 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002133 } else
2134# endif
2135 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002136 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002137 }
2138# if USE_ITT_BUILD
2139 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2140# endif
2141
2142#else // KMP_USE_DYNAMIC_LOCK
2143
Jim Cownie5e8470a2013-09-27 10:38:44 +00002144 kmp_user_lock_p lck;
2145
2146 if ( ( __kmp_user_lock_kind == lk_tas )
2147 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2148 lck = (kmp_user_lock_p)user_lock;
2149 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002150#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002151 else if ( ( __kmp_user_lock_kind == lk_futex )
2152 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2153 lck = (kmp_user_lock_p)user_lock;
2154 }
2155#endif
2156 else {
2157 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2158 }
2159
2160#if USE_ITT_BUILD
2161 __kmp_itt_lock_acquiring( lck );
2162#endif /* USE_ITT_BUILD */
2163
2164 ACQUIRE_LOCK( lck, gtid );
2165
2166#if USE_ITT_BUILD
2167 __kmp_itt_lock_acquired( lck );
2168#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002170#if OMPT_SUPPORT && OMPT_TRACE
2171 if (ompt_enabled &&
2172 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2173 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2174 }
2175#endif
2176
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002177#endif // KMP_USE_DYNAMIC_LOCK
2178}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002179
2180void
2181__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002182#if KMP_USE_DYNAMIC_LOCK
2183
2184# if USE_ITT_BUILD
2185 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2186# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002187 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002188# if USE_ITT_BUILD
2189 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2190#endif
2191
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002192#if OMPT_SUPPORT && OMPT_TRACE
2193 if (ompt_enabled) {
2194 // missing support here: need to know whether acquired first or not
2195 }
2196#endif
2197
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002198#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002199 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002200 kmp_user_lock_p lck;
2201
2202 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2203 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2204 lck = (kmp_user_lock_p)user_lock;
2205 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002206#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207 else if ( ( __kmp_user_lock_kind == lk_futex )
2208 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2209 <= OMP_NEST_LOCK_T_SIZE ) ) {
2210 lck = (kmp_user_lock_p)user_lock;
2211 }
2212#endif
2213 else {
2214 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2215 }
2216
2217#if USE_ITT_BUILD
2218 __kmp_itt_lock_acquiring( lck );
2219#endif /* USE_ITT_BUILD */
2220
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002221 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002222
2223#if USE_ITT_BUILD
2224 __kmp_itt_lock_acquired( lck );
2225#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002226
2227#if OMPT_SUPPORT && OMPT_TRACE
2228 if (ompt_enabled) {
2229 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2230 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2231 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2232 } else {
2233 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2234 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2235 }
2236 }
2237#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002238
2239#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002240}
2241
2242void
2243__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2244{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002245#if KMP_USE_DYNAMIC_LOCK
2246
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002247 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002248# if USE_ITT_BUILD
2249 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2250# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002251# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002252 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002253 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002254 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002255# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002256 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002257 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002258 } else
2259# endif
2260 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002261 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002262 }
2263
2264#else // KMP_USE_DYNAMIC_LOCK
2265
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266 kmp_user_lock_p lck;
2267
2268 /* Can't use serial interval since not block structured */
2269 /* release the lock */
2270
2271 if ( ( __kmp_user_lock_kind == lk_tas )
2272 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002273#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274 // "fast" path implemented to fix customer performance issue
2275#if USE_ITT_BUILD
2276 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2277#endif /* USE_ITT_BUILD */
2278 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2279 KMP_MB();
2280 return;
2281#else
2282 lck = (kmp_user_lock_p)user_lock;
2283#endif
2284 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002285#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286 else if ( ( __kmp_user_lock_kind == lk_futex )
2287 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2288 lck = (kmp_user_lock_p)user_lock;
2289 }
2290#endif
2291 else {
2292 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2293 }
2294
2295#if USE_ITT_BUILD
2296 __kmp_itt_lock_releasing( lck );
2297#endif /* USE_ITT_BUILD */
2298
2299 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002300
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002301#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002302 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002303 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2304 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2305 }
2306#endif
2307
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002308#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002309}
2310
2311/* release the lock */
2312void
2313__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2314{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002315#if KMP_USE_DYNAMIC_LOCK
2316
2317# if USE_ITT_BUILD
2318 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2319# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002320 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002321
2322#else // KMP_USE_DYNAMIC_LOCK
2323
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324 kmp_user_lock_p lck;
2325
2326 /* Can't use serial interval since not block structured */
2327
2328 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2329 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002330#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002331 // "fast" path implemented to fix customer performance issue
2332 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2333#if USE_ITT_BUILD
2334 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2335#endif /* USE_ITT_BUILD */
2336 if ( --(tl->lk.depth_locked) == 0 ) {
2337 TCW_4(tl->lk.poll, 0);
2338 }
2339 KMP_MB();
2340 return;
2341#else
2342 lck = (kmp_user_lock_p)user_lock;
2343#endif
2344 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002345#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346 else if ( ( __kmp_user_lock_kind == lk_futex )
2347 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2348 <= OMP_NEST_LOCK_T_SIZE ) ) {
2349 lck = (kmp_user_lock_p)user_lock;
2350 }
2351#endif
2352 else {
2353 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2354 }
2355
2356#if USE_ITT_BUILD
2357 __kmp_itt_lock_releasing( lck );
2358#endif /* USE_ITT_BUILD */
2359
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002360 int release_status;
2361 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002362#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002363 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002364 if (release_status == KMP_LOCK_RELEASED) {
2365 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2366 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2367 (uint64_t) lck);
2368 }
2369 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2370 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2371 (uint64_t) lck);
2372 }
2373 }
2374#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002375
2376#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377}
2378
2379/* try to acquire the lock */
2380int
2381__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2382{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002383 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002384
2385#if KMP_USE_DYNAMIC_LOCK
2386 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002387 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002388# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002389 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002390# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002391# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002392 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002393 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002394 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002395# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002396 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002397 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002398 } else
2399# endif
2400 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002401 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002402 }
2403 if (rc) {
2404# if USE_ITT_BUILD
2405 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2406# endif
2407 return FTN_TRUE;
2408 } else {
2409# if USE_ITT_BUILD
2410 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2411# endif
2412 return FTN_FALSE;
2413 }
2414
2415#else // KMP_USE_DYNAMIC_LOCK
2416
Jim Cownie5e8470a2013-09-27 10:38:44 +00002417 kmp_user_lock_p lck;
2418 int rc;
2419
2420 if ( ( __kmp_user_lock_kind == lk_tas )
2421 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2422 lck = (kmp_user_lock_p)user_lock;
2423 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002424#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 else if ( ( __kmp_user_lock_kind == lk_futex )
2426 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2427 lck = (kmp_user_lock_p)user_lock;
2428 }
2429#endif
2430 else {
2431 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2432 }
2433
2434#if USE_ITT_BUILD
2435 __kmp_itt_lock_acquiring( lck );
2436#endif /* USE_ITT_BUILD */
2437
2438 rc = TEST_LOCK( lck, gtid );
2439#if USE_ITT_BUILD
2440 if ( rc ) {
2441 __kmp_itt_lock_acquired( lck );
2442 } else {
2443 __kmp_itt_lock_cancelled( lck );
2444 }
2445#endif /* USE_ITT_BUILD */
2446 return ( rc ? FTN_TRUE : FTN_FALSE );
2447
2448 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002449
2450#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451}
2452
2453/* try to acquire the lock */
2454int
2455__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2456{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002457#if KMP_USE_DYNAMIC_LOCK
2458 int rc;
2459# if USE_ITT_BUILD
2460 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2461# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002462 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002463# if USE_ITT_BUILD
2464 if (rc) {
2465 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2466 } else {
2467 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2468 }
2469# endif
2470 return rc;
2471
2472#else // KMP_USE_DYNAMIC_LOCK
2473
Jim Cownie5e8470a2013-09-27 10:38:44 +00002474 kmp_user_lock_p lck;
2475 int rc;
2476
2477 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2478 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2479 lck = (kmp_user_lock_p)user_lock;
2480 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002481#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482 else if ( ( __kmp_user_lock_kind == lk_futex )
2483 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2484 <= OMP_NEST_LOCK_T_SIZE ) ) {
2485 lck = (kmp_user_lock_p)user_lock;
2486 }
2487#endif
2488 else {
2489 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2490 }
2491
2492#if USE_ITT_BUILD
2493 __kmp_itt_lock_acquiring( lck );
2494#endif /* USE_ITT_BUILD */
2495
2496 rc = TEST_NESTED_LOCK( lck, gtid );
2497#if USE_ITT_BUILD
2498 if ( rc ) {
2499 __kmp_itt_lock_acquired( lck );
2500 } else {
2501 __kmp_itt_lock_cancelled( lck );
2502 }
2503#endif /* USE_ITT_BUILD */
2504 return rc;
2505
2506 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002507
2508#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002509}
2510
2511
2512/*--------------------------------------------------------------------------------------------------------------------*/
2513
2514/*
2515 * Interface to fast scalable reduce methods routines
2516 */
2517
2518// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2519// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2520// AT: which solution is better?
2521#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2522 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2523
2524#define __KMP_GET_REDUCTION_METHOD(gtid) \
2525 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2526
2527// description of the packed_reduction_method variable: look at the macros in kmp.h
2528
2529
2530// used in a critical section reduce block
2531static __forceinline void
2532__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2533
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002534 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535 // (although it's used for an internal purpose only)
2536 // why was it visible in previous implementation?
2537 // should we keep it visible in new reduce block?
2538 kmp_user_lock_p lck;
2539
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002540#if KMP_USE_DYNAMIC_LOCK
2541
Jonathan Peytondae13d82015-12-11 21:57:06 +00002542 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2543 // Check if it is initialized.
2544 if (*lk == 0) {
2545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2546 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2547 } else {
2548 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002549 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002550 }
2551 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2552 // this lock initialization does not follow the normal dispatch path (lock table is not used).
2553 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2554 lck = (kmp_user_lock_p)lk;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002555 KMP_DEBUG_ASSERT(lck != NULL);
2556 if (__kmp_env_consistency_check) {
2557 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2558 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002559 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002560 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002561 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2562 lck = ilk->lock;
2563 KMP_DEBUG_ASSERT(lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002564 if (__kmp_env_consistency_check) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002565 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002566 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002567 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002568 }
2569
2570#else // KMP_USE_DYNAMIC_LOCK
2571
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 // We know that the fast reduction code is only emitted by Intel compilers
2573 // with 32 byte critical sections. If there isn't enough space, then we
2574 // have to use a pointer.
2575 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2576 lck = (kmp_user_lock_p)crit;
2577 }
2578 else {
2579 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2580 }
2581 KMP_DEBUG_ASSERT( lck != NULL );
2582
2583 if ( __kmp_env_consistency_check )
2584 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2585
2586 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002587
2588#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002589}
2590
2591// used in a critical section reduce block
2592static __forceinline void
2593__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2594
2595 kmp_user_lock_p lck;
2596
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002597#if KMP_USE_DYNAMIC_LOCK
2598
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002599 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002600 lck = (kmp_user_lock_p)crit;
2601 if (__kmp_env_consistency_check)
2602 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002603 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002604 } else {
2605 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2606 if (__kmp_env_consistency_check)
2607 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002608 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002609 }
2610
2611#else // KMP_USE_DYNAMIC_LOCK
2612
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2614 // sections. If there isn't enough space, then we have to use a pointer.
2615 if ( __kmp_base_user_lock_size > 32 ) {
2616 lck = *( (kmp_user_lock_p *) crit );
2617 KMP_ASSERT( lck != NULL );
2618 } else {
2619 lck = (kmp_user_lock_p) crit;
2620 }
2621
2622 if ( __kmp_env_consistency_check )
2623 __kmp_pop_sync( global_tid, ct_critical, loc );
2624
2625 __kmp_release_user_lock_with_checks( lck, global_tid );
2626
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002627#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002628} // __kmp_end_critical_section_reduce_block
2629
2630
2631/* 2.a.i. Reduce Block without a terminating barrier */
2632/*!
2633@ingroup SYNCHRONIZATION
2634@param loc source location information
2635@param global_tid global thread number
2636@param num_vars number of items (variables) to be reduced
2637@param reduce_size size of data in bytes to be reduced
2638@param reduce_data pointer to data to be reduced
2639@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2640@param lck pointer to the unique lock data structure
2641@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2642
2643The nowait version is used for a reduce clause with the nowait argument.
2644*/
2645kmp_int32
2646__kmpc_reduce_nowait(
2647 ident_t *loc, kmp_int32 global_tid,
2648 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2649 kmp_critical_name *lck ) {
2650
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002651 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002652 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002654#if OMP_40_ENABLED
2655 kmp_team_t *team;
2656 kmp_info_t *th;
2657 int teams_swapped = 0, task_state;
2658#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002659 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2660
2661 // why do we need this initialization here at all?
2662 // Reduction clause can not be used as a stand-alone directive.
2663
2664 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2665 // possible detection of false-positive race by the threadchecker ???
2666 if( ! TCR_4( __kmp_init_parallel ) )
2667 __kmp_parallel_initialize();
2668
2669 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002670#if KMP_USE_DYNAMIC_LOCK
2671 if ( __kmp_env_consistency_check )
2672 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2673#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674 if ( __kmp_env_consistency_check )
2675 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002676#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002677
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002678#if OMP_40_ENABLED
2679 th = __kmp_thread_from_gtid(global_tid);
2680 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2681 team = th->th.th_team;
2682 if( team->t.t_level == th->th.th_teams_level ) {
2683 // this is reduction at teams construct
2684 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2685 // Let's swap teams temporarily for the reduction barrier
2686 teams_swapped = 1;
2687 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2688 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002689 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002690 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002691 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002692 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002693 }
2694 }
2695#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002696
2697 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2698 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2699 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2700 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2701 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2702 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2703 // a thread-specific "th_local.reduction_method" variable is used currently
2704 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2705
2706 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2707 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2708
2709 if( packed_reduction_method == critical_reduce_block ) {
2710
2711 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2712 retval = 1;
2713
2714 } else if( packed_reduction_method == empty_reduce_block ) {
2715
2716 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2717 retval = 1;
2718
2719 } else if( packed_reduction_method == atomic_reduce_block ) {
2720
2721 retval = 2;
2722
2723 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2724 // (it's not quite good, because the checking block has been closed by this 'pop',
2725 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2726 if ( __kmp_env_consistency_check )
2727 __kmp_pop_sync( global_tid, ct_reduce, loc );
2728
2729 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2730
2731 //AT: performance issue: a real barrier here
2732 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2733 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2734 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2735 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2736 // and be more in line with sense of NOWAIT
2737 //AT: TO DO: do epcc test and compare times
2738
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002739 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002741#if USE_ITT_NOTIFY
2742 __kmp_threads[global_tid]->th.th_ident = loc;
2743#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002744 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2745 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2746
2747 // all other workers except master should do this pop here
2748 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2749 if ( __kmp_env_consistency_check ) {
2750 if( retval == 0 ) {
2751 __kmp_pop_sync( global_tid, ct_reduce, loc );
2752 }
2753 }
2754
2755 } else {
2756
2757 // should never reach this block
2758 KMP_ASSERT( 0 ); // "unexpected method"
2759
2760 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002761#if OMP_40_ENABLED
2762 if( teams_swapped ) {
2763 // Restore thread structure
2764 th->th.th_info.ds.ds_tid = 0;
2765 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002766 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002767 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002768 th->th.th_task_state = task_state;
2769 }
2770#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002771 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2772
2773 return retval;
2774}
2775
2776/*!
2777@ingroup SYNCHRONIZATION
2778@param loc source location information
2779@param global_tid global thread id.
2780@param lck pointer to the unique lock data structure
2781
2782Finish the execution of a reduce nowait.
2783*/
2784void
2785__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2786
2787 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2788
2789 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2790
2791 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2792
2793 if( packed_reduction_method == critical_reduce_block ) {
2794
2795 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2796
2797 } else if( packed_reduction_method == empty_reduce_block ) {
2798
2799 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2800
2801 } else if( packed_reduction_method == atomic_reduce_block ) {
2802
2803 // neither master nor other workers should get here
2804 // (code gen does not generate this call in case 2: atomic reduce block)
2805 // actually it's better to remove this elseif at all;
2806 // after removal this value will checked by the 'else' and will assert
2807
2808 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2809
2810 // only master gets here
2811
2812 } else {
2813
2814 // should never reach this block
2815 KMP_ASSERT( 0 ); // "unexpected method"
2816
2817 }
2818
2819 if ( __kmp_env_consistency_check )
2820 __kmp_pop_sync( global_tid, ct_reduce, loc );
2821
2822 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2823
2824 return;
2825}
2826
2827/* 2.a.ii. Reduce Block with a terminating barrier */
2828
2829/*!
2830@ingroup SYNCHRONIZATION
2831@param loc source location information
2832@param global_tid global thread number
2833@param num_vars number of items (variables) to be reduced
2834@param reduce_size size of data in bytes to be reduced
2835@param reduce_data pointer to data to be reduced
2836@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2837@param lck pointer to the unique lock data structure
2838@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2839
2840A blocking reduce that includes an implicit barrier.
2841*/
2842kmp_int32
2843__kmpc_reduce(
2844 ident_t *loc, kmp_int32 global_tid,
2845 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2846 void (*reduce_func)(void *lhs_data, void *rhs_data),
2847 kmp_critical_name *lck )
2848{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002849 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002850 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002851 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2852
2853 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2854
2855 // why do we need this initialization here at all?
2856 // Reduction clause can not be a stand-alone directive.
2857
2858 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2859 // possible detection of false-positive race by the threadchecker ???
2860 if( ! TCR_4( __kmp_init_parallel ) )
2861 __kmp_parallel_initialize();
2862
2863 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002864#if KMP_USE_DYNAMIC_LOCK
2865 if ( __kmp_env_consistency_check )
2866 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2867#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868 if ( __kmp_env_consistency_check )
2869 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002870#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002871
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2873 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2874
2875 if( packed_reduction_method == critical_reduce_block ) {
2876
2877 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2878 retval = 1;
2879
2880 } else if( packed_reduction_method == empty_reduce_block ) {
2881
2882 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2883 retval = 1;
2884
2885 } else if( packed_reduction_method == atomic_reduce_block ) {
2886
2887 retval = 2;
2888
2889 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2890
2891 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002892 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002894#if USE_ITT_NOTIFY
2895 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2896#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2898 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2899
2900 // all other workers except master should do this pop here
2901 // ( none of other workers except master will enter __kmpc_end_reduce() )
2902 if ( __kmp_env_consistency_check ) {
2903 if( retval == 0 ) { // 0: all other workers; 1: master
2904 __kmp_pop_sync( global_tid, ct_reduce, loc );
2905 }
2906 }
2907
2908 } else {
2909
2910 // should never reach this block
2911 KMP_ASSERT( 0 ); // "unexpected method"
2912
2913 }
2914
2915 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2916
2917 return retval;
2918}
2919
2920/*!
2921@ingroup SYNCHRONIZATION
2922@param loc source location information
2923@param global_tid global thread id.
2924@param lck pointer to the unique lock data structure
2925
2926Finish the execution of a blocking reduce.
2927The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2928*/
2929void
2930__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2931
2932 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2933
2934 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2935
2936 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2937
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002938 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939 // (it's a terminating barrier on constructs if NOWAIT not specified)
2940
2941 if( packed_reduction_method == critical_reduce_block ) {
2942
2943 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2944
2945 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002946#if USE_ITT_NOTIFY
2947 __kmp_threads[global_tid]->th.th_ident = loc;
2948#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2950
2951 } else if( packed_reduction_method == empty_reduce_block ) {
2952
2953 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2954
2955 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002956#if USE_ITT_NOTIFY
2957 __kmp_threads[global_tid]->th.th_ident = loc;
2958#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2960
2961 } else if( packed_reduction_method == atomic_reduce_block ) {
2962
2963 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002964#if USE_ITT_NOTIFY
2965 __kmp_threads[global_tid]->th.th_ident = loc;
2966#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2968
2969 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2970
2971 // only master executes here (master releases all other workers)
2972 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2973
2974 } else {
2975
2976 // should never reach this block
2977 KMP_ASSERT( 0 ); // "unexpected method"
2978
2979 }
2980
2981 if ( __kmp_env_consistency_check )
2982 __kmp_pop_sync( global_tid, ct_reduce, loc );
2983
2984 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2985
2986 return;
2987}
2988
2989#undef __KMP_GET_REDUCTION_METHOD
2990#undef __KMP_SET_REDUCTION_METHOD
2991
2992/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2993
2994kmp_uint64
2995__kmpc_get_taskid() {
2996
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002997 kmp_int32 gtid;
2998 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002999
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003000 gtid = __kmp_get_gtid();
3001 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003002 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003003 }; // if
3004 thread = __kmp_thread_from_gtid( gtid );
3005 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003006
3007} // __kmpc_get_taskid
3008
3009
3010kmp_uint64
3011__kmpc_get_parent_taskid() {
3012
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003013 kmp_int32 gtid;
3014 kmp_info_t * thread;
3015 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003017 gtid = __kmp_get_gtid();
3018 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003020 }; // if
3021 thread = __kmp_thread_from_gtid( gtid );
3022 parent_task = thread->th.th_current_task->td_parent;
3023 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024
3025} // __kmpc_get_parent_taskid
3026
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003027void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003028{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003029 if ( ! __kmp_init_serial ) {
3030 __kmp_serial_initialize();
3031 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003032 __kmp_place_num_sockets = nS;
3033 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003035 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037}
3038
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003039#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003040/*!
3041@ingroup WORK_SHARING
3042@param loc source location information.
3043@param gtid global thread number.
3044@param num_dims number of associated doacross loops.
3045@param dims info on loops bounds.
3046
3047Initialize doacross loop information.
3048Expect compiler send us inclusive bounds,
3049e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3050*/
3051void
3052__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims)
3053{
3054 int j, idx;
3055 kmp_int64 last, trace_count;
3056 kmp_info_t *th = __kmp_threads[gtid];
3057 kmp_team_t *team = th->th.th_team;
3058 kmp_uint32 *flags;
3059 kmp_disp_t *pr_buf = th->th.th_dispatch;
3060 dispatch_shared_info_t *sh_buf;
3061
3062 KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3063 gtid, num_dims, !team->t.t_serialized));
3064 KMP_DEBUG_ASSERT(dims != NULL);
3065 KMP_DEBUG_ASSERT(num_dims > 0);
3066
3067 if( team->t.t_serialized ) {
3068 KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n"));
3069 return; // no dependencies if team is serialized
3070 }
3071 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3072 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop
Jonathan Peyton067325f2016-05-31 19:01:15 +00003073 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003074
3075 // Save bounds info into allocated private buffer
3076 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3077 pr_buf->th_doacross_info =
3078 (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1));
3079 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3080 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions
3081 // Save also address of num_done in order to access it later without knowing the buffer index
3082 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3083 pr_buf->th_doacross_info[2] = dims[0].lo;
3084 pr_buf->th_doacross_info[3] = dims[0].up;
3085 pr_buf->th_doacross_info[4] = dims[0].st;
3086 last = 5;
3087 for( j = 1; j < num_dims; ++j ) {
3088 kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0]
3089 if( dims[j].st == 1 ) { // most common case
3090 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3091 range_length = dims[j].up - dims[j].lo + 1;
3092 } else {
3093 if( dims[j].st > 0 ) {
3094 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3095 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3096 } else { // negative increment
3097 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3098 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3099 }
3100 }
3101 pr_buf->th_doacross_info[last++] = range_length;
3102 pr_buf->th_doacross_info[last++] = dims[j].lo;
3103 pr_buf->th_doacross_info[last++] = dims[j].up;
3104 pr_buf->th_doacross_info[last++] = dims[j].st;
3105 }
3106
3107 // Compute total trip count.
3108 // Start with range of dims[0] which we don't need to keep in the buffer.
3109 if( dims[0].st == 1 ) { // most common case
3110 trace_count = dims[0].up - dims[0].lo + 1;
3111 } else if( dims[0].st > 0 ) {
3112 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3113 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3114 } else { // negative increment
3115 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3116 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3117 }
3118 for( j = 1; j < num_dims; ++j ) {
3119 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3120 }
3121 KMP_DEBUG_ASSERT(trace_count > 0);
3122
Jonathan Peyton067325f2016-05-31 19:01:15 +00003123 // Check if shared buffer is not occupied by other loop (idx - __kmp_dispatch_num_buffers)
Jonathan Peyton71909c52016-03-02 22:42:06 +00003124 if( idx != sh_buf->doacross_buf_idx ) {
3125 // Shared buffer is occupied, wait for it to be free
3126 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3127 }
3128 // Check if we are the first thread. After the CAS the first thread gets 0,
3129 // others get 1 if initialization is in progress, allocated pointer otherwise.
3130 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3131 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3132 if( flags == NULL ) {
3133 // we are the first thread, allocate the array of flags
3134 kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3135 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3136 } else if( (kmp_int64)flags == 1 ) {
3137 // initialization is still in progress, need to wait
3138 while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3139 KMP_YIELD(TRUE);
3140 }
3141 }
3142 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer
3143 pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not
3144 // touch shared buffer on each iteration
3145 KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid));
3146}
3147
3148void
3149__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec)
3150{
3151 kmp_int32 shft, num_dims, i;
3152 kmp_uint32 flag;
3153 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3154 kmp_info_t *th = __kmp_threads[gtid];
3155 kmp_team_t *team = th->th.th_team;
3156 kmp_disp_t *pr_buf;
3157 kmp_int64 lo, up, st;
3158
3159 KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3160 if( team->t.t_serialized ) {
3161 KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n"));
3162 return; // no dependencies if team is serialized
3163 }
3164
3165 // calculate sequential iteration number and check out-of-bounds condition
3166 pr_buf = th->th.th_dispatch;
3167 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3168 num_dims = pr_buf->th_doacross_info[0];
3169 lo = pr_buf->th_doacross_info[2];
3170 up = pr_buf->th_doacross_info[3];
3171 st = pr_buf->th_doacross_info[4];
3172 if( st == 1 ) { // most common case
3173 if( vec[0] < lo || vec[0] > up ) {
3174 KA_TRACE(20,(
3175 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3176 gtid, vec[0], lo, up));
3177 return;
3178 }
3179 iter_number = vec[0] - lo;
3180 } else if( st > 0 ) {
3181 if( vec[0] < lo || vec[0] > up ) {
3182 KA_TRACE(20,(
3183 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3184 gtid, vec[0], lo, up));
3185 return;
3186 }
3187 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3188 } else { // negative increment
3189 if( vec[0] > lo || vec[0] < up ) {
3190 KA_TRACE(20,(
3191 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3192 gtid, vec[0], lo, up));
3193 return;
3194 }
3195 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3196 }
3197 for( i = 1; i < num_dims; ++i ) {
3198 kmp_int64 iter, ln;
3199 kmp_int32 j = i * 4;
3200 ln = pr_buf->th_doacross_info[j + 1];
3201 lo = pr_buf->th_doacross_info[j + 2];
3202 up = pr_buf->th_doacross_info[j + 3];
3203 st = pr_buf->th_doacross_info[j + 4];
3204 if( st == 1 ) {
3205 if( vec[i] < lo || vec[i] > up ) {
3206 KA_TRACE(20,(
3207 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3208 gtid, vec[i], lo, up));
3209 return;
3210 }
3211 iter = vec[i] - lo;
3212 } else if( st > 0 ) {
3213 if( vec[i] < lo || vec[i] > up ) {
3214 KA_TRACE(20,(
3215 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3216 gtid, vec[i], lo, up));
3217 return;
3218 }
3219 iter = (kmp_uint64)(vec[i] - lo) / st;
3220 } else { // st < 0
3221 if( vec[i] > lo || vec[i] < up ) {
3222 KA_TRACE(20,(
3223 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3224 gtid, vec[i], lo, up));
3225 return;
3226 }
3227 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3228 }
3229 iter_number = iter + ln * iter_number;
3230 }
3231 shft = iter_number % 32; // use 32-bit granularity
3232 iter_number >>= 5; // divided by 32
3233 flag = 1 << shft;
3234 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3235 KMP_YIELD(TRUE);
3236 }
3237 KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3238 gtid, (iter_number<<5)+shft));
3239}
3240
3241void
3242__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec)
3243{
3244 kmp_int32 shft, num_dims, i;
3245 kmp_uint32 flag;
3246 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3247 kmp_info_t *th = __kmp_threads[gtid];
3248 kmp_team_t *team = th->th.th_team;
3249 kmp_disp_t *pr_buf;
3250 kmp_int64 lo, st;
3251
3252 KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3253 if( team->t.t_serialized ) {
3254 KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n"));
3255 return; // no dependencies if team is serialized
3256 }
3257
3258 // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks)
3259 pr_buf = th->th.th_dispatch;
3260 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3261 num_dims = pr_buf->th_doacross_info[0];
3262 lo = pr_buf->th_doacross_info[2];
3263 st = pr_buf->th_doacross_info[4];
3264 if( st == 1 ) { // most common case
3265 iter_number = vec[0] - lo;
3266 } else if( st > 0 ) {
3267 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3268 } else { // negative increment
3269 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3270 }
3271 for( i = 1; i < num_dims; ++i ) {
3272 kmp_int64 iter, ln;
3273 kmp_int32 j = i * 4;
3274 ln = pr_buf->th_doacross_info[j + 1];
3275 lo = pr_buf->th_doacross_info[j + 2];
3276 st = pr_buf->th_doacross_info[j + 4];
3277 if( st == 1 ) {
3278 iter = vec[i] - lo;
3279 } else if( st > 0 ) {
3280 iter = (kmp_uint64)(vec[i] - lo) / st;
3281 } else { // st < 0
3282 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3283 }
3284 iter_number = iter + ln * iter_number;
3285 }
3286 shft = iter_number % 32; // use 32-bit granularity
3287 iter_number >>= 5; // divided by 32
3288 flag = 1 << shft;
3289 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3290 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3291 KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3292 gtid, (iter_number<<5)+shft));
3293}
3294
3295void
3296__kmpc_doacross_fini(ident_t *loc, int gtid)
3297{
3298 kmp_int64 num_done;
3299 kmp_info_t *th = __kmp_threads[gtid];
3300 kmp_team_t *team = th->th.th_team;
3301 kmp_disp_t *pr_buf = th->th.th_dispatch;
3302
3303 KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3304 if( team->t.t_serialized ) {
3305 KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team));
3306 return; // nothing to do
3307 }
3308 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3309 if( num_done == th->th.th_team_nproc ) {
3310 // we are the last thread, need to free shared resources
3311 int idx = pr_buf->th_doacross_buf_idx - 1;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003312 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003313 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3314 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3315 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3316 __kmp_thread_free(th, (void*)sh_buf->doacross_flags);
3317 sh_buf->doacross_flags = NULL;
3318 sh_buf->doacross_num_done = 0;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003319 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers; // free buffer for future re-use
Jonathan Peyton71909c52016-03-02 22:42:06 +00003320 }
3321 // free private resources (need to keep buffer index forever)
3322 __kmp_thread_free(th, (void*)pr_buf->th_doacross_info);
3323 pr_buf->th_doacross_info = NULL;
3324 KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid));
3325}
3326#endif
3327
Jim Cownie5e8470a2013-09-27 10:38:44 +00003328// end of file //
3329