blob: a9c74cc3340c8b0eff2ec1888ccf7cecfada1a98 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
24#include "ompt-internal.h"
25#include "ompt-specific.h"
26#endif
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#define MAX_MESSAGE 512
29
30/* ------------------------------------------------------------------------ */
31/* ------------------------------------------------------------------------ */
32
33/* flags will be used in future, e.g., to implement */
34/* openmp_strict library restrictions */
35
36/*!
37 * @ingroup STARTUP_SHUTDOWN
38 * @param loc in source location information
39 * @param flags in for future use (currently ignored)
40 *
41 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000043 *
44 */
45void
46__kmpc_begin(ident_t *loc, kmp_int32 flags)
47{
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 if (__kmp_ignore_mppbeg() == FALSE) {
50 __kmp_internal_begin();
51
52 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53 }
54}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
60 * Shutdown the runtime library. This is also optional, and even if called will not
61 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
62 */
63void
64__kmpc_end(ident_t *loc)
65{
66 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69 // will unregister this root (it can cause library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72 KA_TRACE( 30, ("__kmpc_end\n" ));
73
74 __kmp_internal_end_thread( -1 );
75 }
76}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
86single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000087which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000088active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
95
96*/
97kmp_int32
98__kmpc_global_thread_num(ident_t *loc)
99{
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime. That is
114not a number that can be determined by any OpenMP standard calls, since the library may be
115called from more than one non-OpenMP thread, and this reflects the total over all such calls.
116Similarly the runtime maintains underlying threads even when they are not active (since the cost
117of creating and destroying OS threads is high), this call counts all such threads even if they are not
118waiting for work.
119*/
120kmp_int32
121__kmpc_global_num_threads(ident_t *loc)
122{
123 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124
125 return TCR_4(__kmp_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel construct.
132
133*/
134kmp_int32
135__kmpc_bound_thread_num(ident_t *loc)
136{
137 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
146kmp_int32
147__kmpc_bound_num_threads(ident_t *loc)
148{
149 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150
151 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152}
153
154/*!
155 * @ingroup DEPRECATED
156 * @param loc location description
157 *
158 * This function need not be called. It always returns TRUE.
159 */
160kmp_int32
161__kmpc_ok_to_fork(ident_t *loc)
162{
163#ifndef KMP_DEBUG
164
165 return TRUE;
166
167#else
168
169 const char *semi2;
170 const char *semi3;
171 int line_no;
172
173 if (__kmp_par_range == 0) {
174 return TRUE;
175 }
176 semi2 = loc->psource;
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 semi2 = strchr(semi2 + 1, ';');
185 if (semi2 == NULL) {
186 return TRUE;
187 }
188 if (__kmp_par_range_filename[0]) {
189 const char *name = semi2 - 1;
190 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191 name--;
192 }
193 if ((*name == '/') || (*name == ';')) {
194 name++;
195 }
196 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197 return __kmp_par_range < 0;
198 }
199 }
200 semi3 = strchr(semi2 + 1, ';');
201 if (__kmp_par_range_routine[0]) {
202 if ((semi3 != NULL) && (semi3 > semi2)
203 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204 return __kmp_par_range < 0;
205 }
206 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000207 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209 return __kmp_par_range > 0;
210 }
211 return __kmp_par_range < 0;
212 }
213 return TRUE;
214
215#endif /* KMP_DEBUG */
216
217}
218
219/*!
220@ingroup THREAD_STATES
221@param loc Source location information.
222@return 1 if this thread is executing inside an active parallel region, zero if not.
223*/
224kmp_int32
225__kmpc_in_parallel( ident_t *loc )
226{
227 return __kmp_entry_thread() -> th.th_root -> r.r_active;
228}
229
230/*!
231@ingroup PARALLEL
232@param loc source location information
233@param global_tid global thread number
234@param num_threads number of threads requested for this parallel construct
235
236Set the number of threads to be used by the next fork spawned by this thread.
237This call is only required if the parallel construct has a `num_threads` clause.
238*/
239void
240__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241{
242 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243 global_tid, num_threads ) );
244
245 __kmp_push_num_threads( loc, global_tid, num_threads );
246}
247
248void
249__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250{
251 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252
253 /* the num_threads are automatically popped */
254}
255
256
257#if OMP_40_ENABLED
258
259void
260__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261{
262 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263 global_tid, proc_bind ) );
264
265 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266}
267
268#endif /* OMP_40_ENABLED */
269
270
271/*!
272@ingroup PARALLEL
273@param loc source location information
274@param argc total number of arguments in the ellipsis
275@param microtask pointer to callback routine consisting of outlined parallel construct
276@param ... pointers to shared variables that aren't global
277
278Do the actual fork and call the microtask in the relevant number of threads.
279*/
280void
281__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282{
283 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000284
285#if (KMP_STATS_ENABLED)
286 int inParallel = __kmpc_in_parallel(loc);
287 if (inParallel)
288 {
289 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
290 }
291 else
292 {
293 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000304 int tid = __kmp_tid_from_gtid( gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000305 kmp_info_t *master_th = __kmp_threads[ gtid ];
306 kmp_team_t *parent_team = master_th->th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000307 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000308 parent_team->t.t_implicit_task_taskdata[tid].
309 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
316 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000318#if OMPT_SUPPORT
319 VOLATILE_CAST(void *) microtask, // "unwrapped" task
320#endif
321 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
323/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000324#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 &ap
326#else
327 ap
328#endif
329 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330#if INCLUDE_SSC_MARKS
331 SSC_MARK_JOINING();
332#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333 __kmp_join_call( loc, gtid
334#if OMPT_SUPPORT
335 , fork_context_intel
336#endif
337 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338
339 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000340
341#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000342 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000343 parent_team->t.t_implicit_task_taskdata[tid].
344 ompt_task_info.frame.reenter_runtime_frame = 0;
345 }
346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000348#if (KMP_STATS_ENABLED)
349 if (!inParallel)
350 KMP_START_EXPLICIT_TIMER(OMP_serial);
351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352}
353
354#if OMP_40_ENABLED
355/*!
356@ingroup PARALLEL
357@param loc source location information
358@param global_tid global thread number
359@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000360@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361
362Set the number of teams to be used by the teams construct.
363This call is only required if the teams construct has a `num_teams` clause
364or a `thread_limit` clause (or both).
365*/
366void
367__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
368{
369 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
370 global_tid, num_teams, num_threads ) );
371
372 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
373}
374
375/*!
376@ingroup PARALLEL
377@param loc source location information
378@param argc total number of arguments in the ellipsis
379@param microtask pointer to callback routine consisting of outlined teams construct
380@param ... pointers to shared variables that aren't global
381
382Do the actual fork and call the microtask in the relevant number of threads.
383*/
384void
385__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
386{
387 int gtid = __kmp_entry_gtid();
388 kmp_info_t *this_thr = __kmp_threads[ gtid ];
389 va_list ap;
390 va_start( ap, microtask );
391
Jonathan Peyton45be4502015-08-11 21:36:41 +0000392 KMP_COUNT_BLOCK(OMP_TEAMS);
393
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000395 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
397
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000398#if OMPT_SUPPORT
399 kmp_team_t *parent_team = this_thr->th.th_team;
400 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000401 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000402 parent_team->t.t_implicit_task_taskdata[tid].
403 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
404 }
405#endif
406
Jim Cownie5e8470a2013-09-27 10:38:44 +0000407 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000408 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000409 __kmp_push_num_teams( loc, gtid, 0, 0 );
410 }
411 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
413 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000415 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000417#if OMPT_SUPPORT
418 VOLATILE_CAST(void *) microtask, // "unwrapped" task
419#endif
420 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000421 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000422#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000423 &ap
424#else
425 ap
426#endif
427 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000428 __kmp_join_call( loc, gtid
429#if OMPT_SUPPORT
430 , fork_context_intel
431#endif
432 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000433
434#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000435 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000436 parent_team->t.t_implicit_task_taskdata[tid].
437 ompt_task_info.frame.reenter_runtime_frame = NULL;
438 }
439#endif
440
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000441 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000442 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000443 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000444 va_end( ap );
445}
446#endif /* OMP_40_ENABLED */
447
448
449//
450// I don't think this function should ever have been exported.
451// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
452// openmp code ever called it, but it's been exported from the RTL for so
453// long that I'm afraid to remove the definition.
454//
455int
456__kmpc_invoke_task_func( int gtid )
457{
458 return __kmp_invoke_task_func( gtid );
459}
460
461/*!
462@ingroup PARALLEL
463@param loc source location information
464@param global_tid global thread number
465
466Enter a serialized parallel construct. This interface is used to handle a
467conditional parallel region, like this,
468@code
469#pragma omp parallel if (condition)
470@endcode
471when the condition is false.
472*/
473void
474__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
475{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000476 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
477 * kmp_fork_call since the tasks to be done are similar in each case.
478 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479}
480
481/*!
482@ingroup PARALLEL
483@param loc source location information
484@param global_tid global thread number
485
486Leave a serialized parallel construct.
487*/
488void
489__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
490{
491 kmp_internal_control_t *top;
492 kmp_info_t *this_thr;
493 kmp_team_t *serial_team;
494
495 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
496
497 /* skip all this code for autopar serialized loops since it results in
498 unacceptable overhead */
499 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
500 return;
501
502 // Not autopar code
503 if( ! TCR_4( __kmp_init_parallel ) )
504 __kmp_parallel_initialize();
505
506 this_thr = __kmp_threads[ global_tid ];
507 serial_team = this_thr->th.th_serial_team;
508
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000509 #if OMP_41_ENABLED
510 kmp_task_team_t * task_team = this_thr->th.th_task_team;
511
512 // we need to wait for the proxy tasks before finishing the thread
513 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
514 __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
515 #endif
516
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517 KMP_MB();
518 KMP_DEBUG_ASSERT( serial_team );
519 KMP_ASSERT( serial_team -> t.t_serialized );
520 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
521 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
522 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
523 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
524
525 /* If necessary, pop the internal control stack values and replace the team values */
526 top = serial_team -> t.t_control_stack_top;
527 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529 serial_team -> t.t_control_stack_top = top -> next;
530 __kmp_free(top);
531 }
532
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 //if( serial_team -> t.t_serialized > 1 )
534 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535
536 /* pop dispatch buffers stack */
537 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
538 {
539 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
540 serial_team->t.t_dispatch->th_disp_buffer =
541 serial_team->t.t_dispatch->th_disp_buffer->next;
542 __kmp_free( disp_buffer );
543 }
544
545 -- serial_team -> t.t_serialized;
546 if ( serial_team -> t.t_serialized == 0 ) {
547
548 /* return to the parallel section */
549
550#if KMP_ARCH_X86 || KMP_ARCH_X86_64
551 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
552 __kmp_clear_x87_fpu_status_word();
553 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
554 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
555 }
556#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
557
558 this_thr -> th.th_team = serial_team -> t.t_parent;
559 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
560
561 /* restore values cached in the thread */
562 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
563 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
564 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
565
566 /* TODO the below shouldn't need to be adjusted for serialized teams */
567 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
568 t.t_dispatch[ serial_team -> t.t_master_tid ];
569
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570 __kmp_pop_current_task_from_thread( this_thr );
571
572 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
573 this_thr -> th.th_current_task -> td_flags.executing = 1;
574
575 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000576 // Copy the task team from the new child / old parent team to the thread.
577 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
579 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
580 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000581 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
583 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
584 global_tid, serial_team, serial_team -> t.t_serialized ) );
585 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586 }
587
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000588#if USE_ITT_BUILD
589 kmp_uint64 cur_time = 0;
590#if USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000591 if ( __itt_get_timestamp_ptr ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000592 cur_time = __itt_get_timestamp();
593 }
594#endif /* USE_ITT_NOTIFY */
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000595 if ( this_thr->th.th_team->t.t_level == 0
596#if OMP_40_ENABLED
597 && this_thr->th.th_teams_microtask == NULL
598#endif
599 ) {
600 // Report the barrier
Jim Cownie181b4bb2013-12-23 17:28:57 +0000601 this_thr->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000602 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
603 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
604 {
605 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
606 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
607 if ( __kmp_forkjoin_frames_mode == 3 )
608 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
609 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
610 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
611 } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
612 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
613 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
614 __kmp_itt_region_joined( global_tid, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615 }
616#endif /* USE_ITT_BUILD */
617
618 if ( __kmp_env_consistency_check )
619 __kmp_pop_parallel( global_tid, NULL );
620}
621
622/*!
623@ingroup SYNCHRONIZATION
624@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000625
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000626Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627depending on the memory ordering convention obeyed by the compiler
628even that may not be necessary).
629*/
630void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000631__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632{
633 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
634
635 /* need explicit __mf() here since use volatile instead in library */
636 KMP_MB(); /* Flush all pending memory write invalidates. */
637
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000638 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
639 #if KMP_MIC
640 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
641 // We shouldn't need it, though, since the ABI rules require that
642 // * If the compiler generates NGO stores it also generates the fence
643 // * If users hand-code NGO stores they should insert the fence
644 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646 // C74404
647 // This is to address non-temporal store instructions (sfence needed).
648 // The clflush instruction is addressed either (mfence needed).
649 // Probably the non-temporal load monvtdqa instruction should also be addressed.
650 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651 if ( ! __kmp_cpuinfo.initialized ) {
652 __kmp_query_cpuid( & __kmp_cpuinfo );
653 }; // if
654 if ( ! __kmp_cpuinfo.sse2 ) {
655 // CPU cannot execute SSE2 instructions.
656 } else {
657 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
658 _mm_mfence();
659 #else
660 __sync_synchronize();
661 #endif // KMP_COMPILER_ICC
662 }; // if
663 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000664 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
665 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 #elif KMP_ARCH_PPC64
667 // Nothing needed here (we have a real MB above).
668 #if KMP_OS_CNK
669 // The flushing thread needs to yield here; this prevents a
670 // busy-waiting thread from saturating the pipeline. flush is
671 // often used in loops like this:
672 // while (!flag) {
673 // #pragma omp flush(flag)
674 // }
675 // and adding the yield here is good for at least a 10x speedup
676 // when running >2 threads per core (on the NAS LU benchmark).
677 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000679 #else
680 #error Unknown or unsupported architecture
681 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682
683}
684
685/* -------------------------------------------------------------------------- */
686
687/* -------------------------------------------------------------------------- */
688
689/*!
690@ingroup SYNCHRONIZATION
691@param loc source location information
692@param global_tid thread id.
693
694Execute a barrier.
695*/
696void
697__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
698{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000699 KMP_COUNT_BLOCK(OMP_BARRIER);
700 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
702
703 if (! TCR_4(__kmp_init_parallel))
704 __kmp_parallel_initialize();
705
706 if ( __kmp_env_consistency_check ) {
707 if ( loc == 0 ) {
708 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
709 }; // if
710
711 __kmp_check_barrier( global_tid, ct_barrier, loc );
712 }
713
714 __kmp_threads[ global_tid ]->th.th_ident = loc;
715 // TODO: explicit barrier_wait_id:
716 // this function is called when 'barrier' directive is present or
717 // implicit barrier at the end of a worksharing construct.
718 // 1) better to add a per-thread barrier counter to a thread data structure
719 // 2) set to 0 when a new team is created
720 // 4) no sync is required
721
722 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
723}
724
725/* The BARRIER for a MASTER section is always explicit */
726/*!
727@ingroup WORK_SHARING
728@param loc source location information.
729@param global_tid global thread number .
730@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
731*/
732kmp_int32
733__kmpc_master(ident_t *loc, kmp_int32 global_tid)
734{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000735 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 int status = 0;
737
738 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
739
740 if( ! TCR_4( __kmp_init_parallel ) )
741 __kmp_parallel_initialize();
742
Jonathan Peyton45be4502015-08-11 21:36:41 +0000743 if( KMP_MASTER_GTID( global_tid )) {
744 KMP_START_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000746 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000748#if OMPT_SUPPORT && OMPT_TRACE
749 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000750 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000751 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000752 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
753 kmp_team_t *team = this_thr -> th.th_team;
754
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000755 int tid = __kmp_tid_from_gtid( global_tid );
756 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
757 team->t.ompt_team_info.parallel_id,
758 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
759 }
760 }
761#endif
762
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000764#if KMP_USE_DYNAMIC_LOCK
765 if (status)
766 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
767 else
768 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
769#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770 if (status)
771 __kmp_push_sync( global_tid, ct_master, loc, NULL );
772 else
773 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775 }
776
777 return status;
778}
779
780/*!
781@ingroup WORK_SHARING
782@param loc source location information.
783@param global_tid global thread number .
784
785Mark the end of a <tt>master</tt> region. This should only be called by the thread
786that executes the <tt>master</tt> region.
787*/
788void
789__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
790{
791 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
792
793 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton45be4502015-08-11 21:36:41 +0000794 KMP_STOP_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000796#if OMPT_SUPPORT && OMPT_TRACE
797 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
798 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000799 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000800 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
801 int tid = __kmp_tid_from_gtid( global_tid );
802 ompt_callbacks.ompt_callback(ompt_event_master_end)(
803 team->t.ompt_team_info.parallel_id,
804 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
805 }
806#endif
807
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808 if ( __kmp_env_consistency_check ) {
809 if( global_tid < 0 )
810 KMP_WARNING( ThreadIdentInvalid );
811
812 if( KMP_MASTER_GTID( global_tid ))
813 __kmp_pop_sync( global_tid, ct_master, loc );
814 }
815}
816
817/*!
818@ingroup WORK_SHARING
819@param loc source location information.
820@param gtid global thread number.
821
822Start execution of an <tt>ordered</tt> construct.
823*/
824void
825__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
826{
827 int cid = 0;
828 kmp_info_t *th;
829 KMP_DEBUG_ASSERT( __kmp_init_serial );
830
831 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
832
833 if (! TCR_4(__kmp_init_parallel))
834 __kmp_parallel_initialize();
835
836#if USE_ITT_BUILD
837 __kmp_itt_ordered_prep( gtid );
838 // TODO: ordered_wait_id
839#endif /* USE_ITT_BUILD */
840
841 th = __kmp_threads[ gtid ];
842
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000844 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000845 /* OMPT state update */
846 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
847 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
848
849 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000850 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000851 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
852 th->th.ompt_thread_info.wait_id);
853 }
854 }
855#endif
856
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
858 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
859 else
860 __kmp_parallel_deo( & gtid, & cid, loc );
861
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000862#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000863 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864 /* OMPT state update */
865 th->th.ompt_thread_info.state = ompt_state_work_parallel;
866 th->th.ompt_thread_info.wait_id = 0;
867
868 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000869 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000870 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
871 th->th.ompt_thread_info.wait_id);
872 }
873 }
874#endif
875
Jim Cownie5e8470a2013-09-27 10:38:44 +0000876#if USE_ITT_BUILD
877 __kmp_itt_ordered_start( gtid );
878#endif /* USE_ITT_BUILD */
879}
880
881/*!
882@ingroup WORK_SHARING
883@param loc source location information.
884@param gtid global thread number.
885
886End execution of an <tt>ordered</tt> construct.
887*/
888void
889__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
890{
891 int cid = 0;
892 kmp_info_t *th;
893
894 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
895
896#if USE_ITT_BUILD
897 __kmp_itt_ordered_end( gtid );
898 // TODO: ordered_wait_id
899#endif /* USE_ITT_BUILD */
900
901 th = __kmp_threads[ gtid ];
902
903 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
904 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
905 else
906 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000907
908#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000909 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000910 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
911 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
912 th->th.ompt_thread_info.wait_id);
913 }
914#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915}
916
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917#if KMP_USE_DYNAMIC_LOCK
918
919static __forceinline kmp_indirect_lock_t *
920__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
921{
922 // Code from __kmp_get_critical_section_ptr
923 // This function returns an indirect lock object instead of a user lock.
924 kmp_indirect_lock_t **lck, *ret;
925 lck = (kmp_indirect_lock_t **)crit;
926 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
927 if (ret == NULL) {
928 void *idx;
929 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
930 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
931 ret = ilk;
932 DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
933 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
934 DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
935 KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
936#if USE_ITT_BUILD
937 __kmp_itt_critical_creating(ilk->lock, loc);
938#endif
939 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
940 if (status == 0) {
941#if USE_ITT_BUILD
942 __kmp_itt_critical_destroyed(ilk->lock);
943#endif
944 // Postponing destroy, to avoid costly dispatch here.
945 //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
946 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
947 KMP_DEBUG_ASSERT(ret != NULL);
948 }
949 }
950 return ret;
951}
952
953// Fast-path acquire tas lock
954#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
955 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
956 if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
957 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
958 kmp_uint32 spins; \
959 KMP_FSYNC_PREPARE(l); \
960 KMP_INIT_YIELD(spins); \
961 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
962 KMP_YIELD(TRUE); \
963 } else { \
964 KMP_YIELD_SPIN(spins); \
965 } \
966 while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
967 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
968 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
969 KMP_YIELD(TRUE); \
970 } else { \
971 KMP_YIELD_SPIN(spins); \
972 } \
973 } \
974 } \
975 KMP_FSYNC_ACQUIRED(l); \
976}
977
978// Fast-path test tas lock
979#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
980 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
981 rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
982 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
983}
984
985// Fast-path release tas lock
986#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
987 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
988 KMP_MB(); \
989}
990
991#if DYNA_HAS_FUTEX
992
993# include <unistd.h>
994# include <sys/syscall.h>
995# ifndef FUTEX_WAIT
996# define FUTEX_WAIT 0
997# endif
998# ifndef FUTEX_WAKE
999# define FUTEX_WAKE 1
1000# endif
1001
1002// Fast-path acquire futex lock
1003#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
1004 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1005 kmp_int32 gtid_code = (gtid+1) << 1; \
1006 KMP_MB(); \
1007 KMP_FSYNC_PREPARE(ftx); \
1008 kmp_int32 poll_val; \
1009 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
1010 DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
1011 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
1012 if (!cond) { \
1013 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
1014 continue; \
1015 } \
1016 poll_val |= DYNA_LOCK_BUSY(1, futex); \
1017 } \
1018 kmp_int32 rc; \
1019 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
1020 continue; \
1021 } \
1022 gtid_code |= 1; \
1023 } \
1024 KMP_FSYNC_ACQUIRED(ftx); \
1025}
1026
1027// Fast-path test futex lock
1028#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
1029 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1030 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
1031 KMP_FSYNC_ACQUIRED(ftx); \
1032 rc = TRUE; \
1033 } else { \
1034 rc = FALSE; \
1035 } \
1036}
1037
1038// Fast-path release futex lock
1039#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041 KMP_MB(); \
1042 KMP_FSYNC_RELEASING(ftx); \
1043 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
1044 if (DYNA_LOCK_STRIP(poll_val) & 1) { \
1045 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1046 } \
1047 KMP_MB(); \
1048 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1049}
1050
1051#endif // DYNA_HAS_FUTEX
1052
1053#else // KMP_USE_DYNAMIC_LOCK
1054
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055static kmp_user_lock_p
1056__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1057{
1058 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1059
1060 //
1061 // Because of the double-check, the following load
1062 // doesn't need to be volatile.
1063 //
1064 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1065
1066 if ( lck == NULL ) {
1067 void * idx;
1068
1069 // Allocate & initialize the lock.
1070 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1071 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1072 __kmp_init_user_lock_with_checks( lck );
1073 __kmp_set_user_lock_location( lck, loc );
1074#if USE_ITT_BUILD
1075 __kmp_itt_critical_creating( lck );
1076 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1077 // lock. It is the only place where we can guarantee it. There are chances the lock will
1078 // destroyed with no usage, but it is not a problem, because this is not real event seen
1079 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1080#endif /* USE_ITT_BUILD */
1081
1082 //
1083 // Use a cmpxchg instruction to slam the start of the critical
1084 // section with the lock pointer. If another thread beat us
1085 // to it, deallocate the lock, and use the lock that the other
1086 // thread allocated.
1087 //
1088 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1089
1090 if ( status == 0 ) {
1091 // Deallocate the lock and reload the value.
1092#if USE_ITT_BUILD
1093 __kmp_itt_critical_destroyed( lck );
1094 // Let ITT know the lock is destroyed and the same memory location may be reused for
1095 // another purpose.
1096#endif /* USE_ITT_BUILD */
1097 __kmp_destroy_user_lock_with_checks( lck );
1098 __kmp_user_lock_free( &idx, gtid, lck );
1099 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1100 KMP_DEBUG_ASSERT( lck != NULL );
1101 }
1102 }
1103 return lck;
1104}
1105
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001106#endif // KMP_USE_DYNAMIC_LOCK
1107
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108/*!
1109@ingroup WORK_SHARING
1110@param loc source location information.
1111@param global_tid global thread number .
1112@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1113some other suitably unique value.
1114
1115Enter code protected by a `critical` construct.
1116This function blocks until the executing thread can enter the critical section.
1117*/
1118void
1119__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001120 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121
1122 kmp_user_lock_p lck;
1123
1124 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1125
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001126#if KMP_USE_DYNAMIC_LOCK
1127 // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
1128 // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
1129 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1130 lck = (kmp_user_lock_p)crit;
1131 // The thread that reaches here first needs to tag the lock word.
1132 if (*((kmp_dyna_lock_t *)lck) == 0) {
1133 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
1134 }
1135 if (__kmp_env_consistency_check) {
1136 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1137 }
1138# if USE_ITT_BUILD
1139 __kmp_itt_critical_acquiring(lck);
1140# endif
1141# if DYNA_USE_FAST_TAS
1142 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1143 DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1144 } else
1145# elif DYNA_USE_FAST_FUTEX
1146 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1147 DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1148 } else
1149# endif
1150 {
1151 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1152 }
1153 } else {
1154 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1155 lck = ilk->lock;
1156 if (__kmp_env_consistency_check) {
1157 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1158 }
1159# if USE_ITT_BUILD
1160 __kmp_itt_critical_acquiring(lck);
1161# endif
1162 DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1163 }
1164
1165#else // KMP_USE_DYNAMIC_LOCK
1166
Jim Cownie5e8470a2013-09-27 10:38:44 +00001167 //TODO: add THR_OVHD_STATE
1168
1169 KMP_CHECK_USER_LOCK_INIT();
1170
1171 if ( ( __kmp_user_lock_kind == lk_tas )
1172 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1173 lck = (kmp_user_lock_p)crit;
1174 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001175#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176 else if ( ( __kmp_user_lock_kind == lk_futex )
1177 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1178 lck = (kmp_user_lock_p)crit;
1179 }
1180#endif
1181 else { // ticket, queuing or drdpa
1182 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1183 }
1184
1185 if ( __kmp_env_consistency_check )
1186 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1187
1188 /* since the critical directive binds to all threads, not just
1189 * the current team we have to check this even if we are in a
1190 * serialized team */
1191 /* also, even if we are the uber thread, we still have to conduct the lock,
1192 * as we have to contend with sibling threads */
1193
1194#if USE_ITT_BUILD
1195 __kmp_itt_critical_acquiring( lck );
1196#endif /* USE_ITT_BUILD */
1197 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001198 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1199
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001200#endif // KMP_USE_DYNAMIC_LOCK
1201
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202#if USE_ITT_BUILD
1203 __kmp_itt_critical_acquired( lck );
1204#endif /* USE_ITT_BUILD */
1205
1206 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1207} // __kmpc_critical
1208
1209/*!
1210@ingroup WORK_SHARING
1211@param loc source location information.
1212@param global_tid global thread number .
1213@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1214some other suitably unique value.
1215
1216Leave a critical section, releasing any lock that was held during its execution.
1217*/
1218void
1219__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1220{
1221 kmp_user_lock_p lck;
1222
1223 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1224
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001225#if KMP_USE_DYNAMIC_LOCK
1226 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1227 lck = (kmp_user_lock_p)crit;
1228 KMP_ASSERT(lck != NULL);
1229 if (__kmp_env_consistency_check) {
1230 __kmp_pop_sync(global_tid, ct_critical, loc);
1231 }
1232# if USE_ITT_BUILD
1233 __kmp_itt_critical_releasing( lck );
1234# endif
1235# if DYNA_USE_FAST_TAS
1236 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1237 DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1238 } else
1239# elif DYNA_USE_FAST_FUTEX
1240 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1241 DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1242 } else
1243# endif
1244 {
1245 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1246 }
1247 } else {
1248 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1249 KMP_ASSERT(ilk != NULL);
1250 lck = ilk->lock;
1251 if (__kmp_env_consistency_check) {
1252 __kmp_pop_sync(global_tid, ct_critical, loc);
1253 }
1254# if USE_ITT_BUILD
1255 __kmp_itt_critical_releasing( lck );
1256# endif
1257 DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1258 }
1259
1260#else // KMP_USE_DYNAMIC_LOCK
1261
Jim Cownie5e8470a2013-09-27 10:38:44 +00001262 if ( ( __kmp_user_lock_kind == lk_tas )
1263 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1264 lck = (kmp_user_lock_p)crit;
1265 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001266#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001267 else if ( ( __kmp_user_lock_kind == lk_futex )
1268 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1269 lck = (kmp_user_lock_p)crit;
1270 }
1271#endif
1272 else { // ticket, queuing or drdpa
1273 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1274 }
1275
1276 KMP_ASSERT(lck != NULL);
1277
1278 if ( __kmp_env_consistency_check )
1279 __kmp_pop_sync( global_tid, ct_critical, loc );
1280
1281#if USE_ITT_BUILD
1282 __kmp_itt_critical_releasing( lck );
1283#endif /* USE_ITT_BUILD */
1284 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001285 __kmp_release_user_lock_with_checks( lck, global_tid );
1286
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001287#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001288 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001289 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1290 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1291 (uint64_t) lck);
1292 }
1293#endif
1294
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001295#endif // KMP_USE_DYNAMIC_LOCK
1296
Jim Cownie5e8470a2013-09-27 10:38:44 +00001297 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1298}
1299
1300/*!
1301@ingroup SYNCHRONIZATION
1302@param loc source location information
1303@param global_tid thread id.
1304@return one if the thread should execute the master block, zero otherwise
1305
1306Start execution of a combined barrier and master. The barrier is executed inside this function.
1307*/
1308kmp_int32
1309__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1310{
1311 int status;
1312
1313 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1314
1315 if (! TCR_4(__kmp_init_parallel))
1316 __kmp_parallel_initialize();
1317
1318 if ( __kmp_env_consistency_check )
1319 __kmp_check_barrier( global_tid, ct_barrier, loc );
1320
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001321#if USE_ITT_NOTIFY
1322 __kmp_threads[global_tid]->th.th_ident = loc;
1323#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1325
1326 return (status != 0) ? 0 : 1;
1327}
1328
1329/*!
1330@ingroup SYNCHRONIZATION
1331@param loc source location information
1332@param global_tid thread id.
1333
1334Complete the execution of a combined barrier and master. This function should
1335only be called at the completion of the <tt>master</tt> code. Other threads will
1336still be waiting at the barrier and this call releases them.
1337*/
1338void
1339__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1340{
1341 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1342
1343 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1344}
1345
1346/*!
1347@ingroup SYNCHRONIZATION
1348@param loc source location information
1349@param global_tid thread id.
1350@return one if the thread should execute the master block, zero otherwise
1351
1352Start execution of a combined barrier and master(nowait) construct.
1353The barrier is executed inside this function.
1354There is no equivalent "end" function, since the
1355*/
1356kmp_int32
1357__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1358{
1359 kmp_int32 ret;
1360
1361 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1362
1363 if (! TCR_4(__kmp_init_parallel))
1364 __kmp_parallel_initialize();
1365
1366 if ( __kmp_env_consistency_check ) {
1367 if ( loc == 0 ) {
1368 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1369 }
1370 __kmp_check_barrier( global_tid, ct_barrier, loc );
1371 }
1372
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001373#if USE_ITT_NOTIFY
1374 __kmp_threads[global_tid]->th.th_ident = loc;
1375#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1377
1378 ret = __kmpc_master (loc, global_tid);
1379
1380 if ( __kmp_env_consistency_check ) {
1381 /* there's no __kmpc_end_master called; so the (stats) */
1382 /* actions of __kmpc_end_master are done here */
1383
1384 if ( global_tid < 0 ) {
1385 KMP_WARNING( ThreadIdentInvalid );
1386 }
1387 if (ret) {
1388 /* only one thread should do the pop since only */
1389 /* one did the push (see __kmpc_master()) */
1390
1391 __kmp_pop_sync( global_tid, ct_master, loc );
1392 }
1393 }
1394
1395 return (ret);
1396}
1397
1398/* The BARRIER for a SINGLE process section is always explicit */
1399/*!
1400@ingroup WORK_SHARING
1401@param loc source location information
1402@param global_tid global thread number
1403@return One if this thread should execute the single construct, zero otherwise.
1404
1405Test whether to execute a <tt>single</tt> construct.
1406There are no implicit barriers in the two "single" calls, rather the compiler should
1407introduce an explicit barrier if it is required.
1408*/
1409
1410kmp_int32
1411__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1412{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001415 if(rc == TRUE) {
1416 KMP_START_EXPLICIT_TIMER(OMP_single);
1417 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001418
1419#if OMPT_SUPPORT && OMPT_TRACE
1420 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1421 kmp_team_t *team = this_thr -> th.th_team;
1422 int tid = __kmp_tid_from_gtid( global_tid );
1423
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001424 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001425 if (rc) {
1426 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1427 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1428 team->t.ompt_team_info.parallel_id,
1429 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1430 team->t.ompt_team_info.microtask);
1431 }
1432 } else {
1433 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1434 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1435 team->t.ompt_team_info.parallel_id,
1436 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1437 }
1438 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1439 }
1440 }
1441#endif
1442
Jim Cownie5e8470a2013-09-27 10:38:44 +00001443 return rc;
1444}
1445
1446/*!
1447@ingroup WORK_SHARING
1448@param loc source location information
1449@param global_tid global thread number
1450
1451Mark the end of a <tt>single</tt> construct. This function should
1452only be called by the thread that executed the block of code protected
1453by the `single` construct.
1454*/
1455void
1456__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1457{
1458 __kmp_exit_single( global_tid );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001459 KMP_STOP_EXPLICIT_TIMER(OMP_single);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001460
1461#if OMPT_SUPPORT && OMPT_TRACE
1462 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1463 kmp_team_t *team = this_thr -> th.th_team;
1464 int tid = __kmp_tid_from_gtid( global_tid );
1465
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001466 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001467 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1468 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1469 team->t.ompt_team_info.parallel_id,
1470 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1471 }
1472#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473}
1474
1475/*!
1476@ingroup WORK_SHARING
1477@param loc Source location
1478@param global_tid Global thread id
1479
1480Mark the end of a statically scheduled loop.
1481*/
1482void
1483__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1484{
1485 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1486
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001487#if OMPT_SUPPORT && OMPT_TRACE
1488 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1489 kmp_team_t *team = this_thr -> th.th_team;
1490 int tid = __kmp_tid_from_gtid( global_tid );
1491
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001492 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001493 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1494 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1495 team->t.ompt_team_info.parallel_id,
1496 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1497 }
1498#endif
1499
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500 if ( __kmp_env_consistency_check )
1501 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1502}
1503
1504/*
1505 * User routines which take C-style arguments (call by value)
1506 * different from the Fortran equivalent routines
1507 */
1508
1509void
1510ompc_set_num_threads( int arg )
1511{
1512// !!!!! TODO: check the per-task binding
1513 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1514}
1515
1516void
1517ompc_set_dynamic( int flag )
1518{
1519 kmp_info_t *thread;
1520
1521 /* For the thread-private implementation of the internal controls */
1522 thread = __kmp_entry_thread();
1523
1524 __kmp_save_internal_controls( thread );
1525
1526 set__dynamic( thread, flag ? TRUE : FALSE );
1527}
1528
1529void
1530ompc_set_nested( int flag )
1531{
1532 kmp_info_t *thread;
1533
1534 /* For the thread-private internal controls implementation */
1535 thread = __kmp_entry_thread();
1536
1537 __kmp_save_internal_controls( thread );
1538
1539 set__nested( thread, flag ? TRUE : FALSE );
1540}
1541
Jim Cownie5e8470a2013-09-27 10:38:44 +00001542void
1543ompc_set_max_active_levels( int max_active_levels )
1544{
1545 /* TO DO */
1546 /* we want per-task implementation of this internal control */
1547
1548 /* For the per-thread internal controls implementation */
1549 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1550}
1551
1552void
1553ompc_set_schedule( omp_sched_t kind, int modifier )
1554{
1555// !!!!! TODO: check the per-task binding
1556 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1557}
1558
1559int
1560ompc_get_ancestor_thread_num( int level )
1561{
1562 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1563}
1564
1565int
1566ompc_get_team_size( int level )
1567{
1568 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1569}
1570
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571void
1572kmpc_set_stacksize( int arg )
1573{
1574 // __kmp_aux_set_stacksize initializes the library if needed
1575 __kmp_aux_set_stacksize( arg );
1576}
1577
1578void
1579kmpc_set_stacksize_s( size_t arg )
1580{
1581 // __kmp_aux_set_stacksize initializes the library if needed
1582 __kmp_aux_set_stacksize( arg );
1583}
1584
1585void
1586kmpc_set_blocktime( int arg )
1587{
1588 int gtid, tid;
1589 kmp_info_t *thread;
1590
1591 gtid = __kmp_entry_gtid();
1592 tid = __kmp_tid_from_gtid(gtid);
1593 thread = __kmp_thread_from_gtid(gtid);
1594
1595 __kmp_aux_set_blocktime( arg, thread, tid );
1596}
1597
1598void
1599kmpc_set_library( int arg )
1600{
1601 // __kmp_user_set_library initializes the library if needed
1602 __kmp_user_set_library( (enum library_type)arg );
1603}
1604
1605void
1606kmpc_set_defaults( char const * str )
1607{
1608 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001609 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610}
1611
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612int
1613kmpc_set_affinity_mask_proc( int proc, void **mask )
1614{
Alp Toker98758b02014-03-02 04:12:06 +00001615#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616 return -1;
1617#else
1618 if ( ! TCR_4(__kmp_init_middle) ) {
1619 __kmp_middle_initialize();
1620 }
1621 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1622#endif
1623}
1624
1625int
1626kmpc_unset_affinity_mask_proc( int proc, void **mask )
1627{
Alp Toker98758b02014-03-02 04:12:06 +00001628#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629 return -1;
1630#else
1631 if ( ! TCR_4(__kmp_init_middle) ) {
1632 __kmp_middle_initialize();
1633 }
1634 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1635#endif
1636}
1637
1638int
1639kmpc_get_affinity_mask_proc( int proc, void **mask )
1640{
Alp Toker98758b02014-03-02 04:12:06 +00001641#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642 return -1;
1643#else
1644 if ( ! TCR_4(__kmp_init_middle) ) {
1645 __kmp_middle_initialize();
1646 }
1647 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1648#endif
1649}
1650
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651
1652/* -------------------------------------------------------------------------- */
1653/*!
1654@ingroup THREADPRIVATE
1655@param loc source location information
1656@param gtid global thread number
1657@param cpy_size size of the cpy_data buffer
1658@param cpy_data pointer to data to be copied
1659@param cpy_func helper function to call for copying data
1660@param didit flag variable: 1=single thread; 0=not single thread
1661
1662__kmpc_copyprivate implements the interface for the private data broadcast needed for
1663the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1664All threads participating in the parallel region call this routine.
1665One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1666and all other threads should have that variable set to 0.
1667All threads pass a pointer to a data buffer (cpy_data) that they have built.
1668
1669The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1670clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1671race conditions, so the code generation for the single region should avoid generating a barrier
1672after the call to @ref __kmpc_copyprivate.
1673
1674The <tt>gtid</tt> parameter is the global thread id for the current thread.
1675The <tt>loc</tt> parameter is a pointer to source location information.
1676
1677Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1678to a team-private location, then the other threads will each call the function pointed to by
1679the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1680
1681The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1682and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1683the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1684to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1685
1686The interface to cpy_func is as follows:
1687@code
1688void cpy_func( void *destination, void *source )
1689@endcode
1690where void *destination is the cpy_data pointer for the thread being copied to
1691and void *source is the cpy_data pointer for the thread being copied from.
1692*/
1693void
1694__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1695{
1696 void **data_ptr;
1697
1698 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1699
1700 KMP_MB();
1701
1702 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1703
1704 if ( __kmp_env_consistency_check ) {
1705 if ( loc == 0 ) {
1706 KMP_WARNING( ConstructIdentInvalid );
1707 }
1708 }
1709
1710 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1711
1712 if (didit) *data_ptr = cpy_data;
1713
1714 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001715#if USE_ITT_NOTIFY
1716 __kmp_threads[gtid]->th.th_ident = loc;
1717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1719
1720 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1721
1722 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1723 /* Nesting checks are already handled by the single construct checks */
1724
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001725#if USE_ITT_NOTIFY
1726 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1729}
1730
1731/* -------------------------------------------------------------------------- */
1732
1733#define INIT_LOCK __kmp_init_user_lock_with_checks
1734#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1735#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1736#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1737#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1738#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1739#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1740#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1741#define TEST_LOCK __kmp_test_user_lock_with_checks
1742#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1743#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1744#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1745
1746
1747/*
1748 * TODO: Make check abort messages use location info & pass it
1749 * into with_checks routines
1750 */
1751
1752/* initialize the lock */
1753void
1754__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001755#if KMP_USE_DYNAMIC_LOCK
1756 KMP_DEBUG_ASSERT(__kmp_init_serial);
1757 if (__kmp_env_consistency_check && user_lock == NULL) {
1758 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1759 }
1760 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1761 DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1762# if USE_ITT_BUILD
1763 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1764# endif
1765 } else {
1766 DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1767 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1768 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1769# if USE_ITT_BUILD
1770 __kmp_itt_lock_creating(ilk->lock, loc);
1771# endif
1772 }
1773
1774#else // KMP_USE_DYNAMIC_LOCK
1775
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776 static char const * const func = "omp_init_lock";
1777 kmp_user_lock_p lck;
1778 KMP_DEBUG_ASSERT( __kmp_init_serial );
1779
1780 if ( __kmp_env_consistency_check ) {
1781 if ( user_lock == NULL ) {
1782 KMP_FATAL( LockIsUninitialized, func );
1783 }
1784 }
1785
1786 KMP_CHECK_USER_LOCK_INIT();
1787
1788 if ( ( __kmp_user_lock_kind == lk_tas )
1789 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1790 lck = (kmp_user_lock_p)user_lock;
1791 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001792#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001793 else if ( ( __kmp_user_lock_kind == lk_futex )
1794 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1795 lck = (kmp_user_lock_p)user_lock;
1796 }
1797#endif
1798 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001799 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800 }
1801 INIT_LOCK( lck );
1802 __kmp_set_user_lock_location( lck, loc );
1803
1804#if USE_ITT_BUILD
1805 __kmp_itt_lock_creating( lck );
1806#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001807
1808#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809} // __kmpc_init_lock
1810
1811/* initialize the lock */
1812void
1813__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001814#if KMP_USE_DYNAMIC_LOCK
1815
1816 KMP_DEBUG_ASSERT(__kmp_init_serial);
1817 if (__kmp_env_consistency_check && user_lock == NULL) {
1818 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1819 }
1820 // Invoke init function after converting to nested version.
1821 kmp_dyna_lockseq_t nested_seq;
1822 switch (__kmp_user_lock_seq) {
1823 case lockseq_tas: nested_seq = lockseq_nested_tas; break;
1824#if DYNA_HAS_FUTEX
1825 case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1826#endif
1827 case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1828 case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1829 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1830 default: nested_seq = lockseq_nested_queuing; break;
1831 // Use nested queuing lock for lock kinds without "nested" implementation.
1832 }
1833 DYNA_INIT_I_LOCK(user_lock, nested_seq);
1834 // All nested locks are indirect locks.
1835 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1836 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1837# if USE_ITT_BUILD
1838 __kmp_itt_lock_creating(ilk->lock, loc);
1839# endif
1840
1841#else // KMP_USE_DYNAMIC_LOCK
1842
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843 static char const * const func = "omp_init_nest_lock";
1844 kmp_user_lock_p lck;
1845 KMP_DEBUG_ASSERT( __kmp_init_serial );
1846
1847 if ( __kmp_env_consistency_check ) {
1848 if ( user_lock == NULL ) {
1849 KMP_FATAL( LockIsUninitialized, func );
1850 }
1851 }
1852
1853 KMP_CHECK_USER_LOCK_INIT();
1854
1855 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1856 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1857 lck = (kmp_user_lock_p)user_lock;
1858 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001859#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001860 else if ( ( __kmp_user_lock_kind == lk_futex )
1861 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1862 <= OMP_NEST_LOCK_T_SIZE ) ) {
1863 lck = (kmp_user_lock_p)user_lock;
1864 }
1865#endif
1866 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001867 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868 }
1869
1870 INIT_NESTED_LOCK( lck );
1871 __kmp_set_user_lock_location( lck, loc );
1872
1873#if USE_ITT_BUILD
1874 __kmp_itt_lock_creating( lck );
1875#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001876
1877#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878} // __kmpc_init_nest_lock
1879
1880void
1881__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001882#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001884# if USE_ITT_BUILD
1885 kmp_user_lock_p lck;
1886 if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1887 lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1888 } else {
1889 lck = (kmp_user_lock_p)user_lock;
1890 }
1891 __kmp_itt_lock_destroyed(lck);
1892# endif
1893 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1894#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001895 kmp_user_lock_p lck;
1896
1897 if ( ( __kmp_user_lock_kind == lk_tas )
1898 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1899 lck = (kmp_user_lock_p)user_lock;
1900 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001901#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902 else if ( ( __kmp_user_lock_kind == lk_futex )
1903 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1904 lck = (kmp_user_lock_p)user_lock;
1905 }
1906#endif
1907 else {
1908 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1909 }
1910
1911#if USE_ITT_BUILD
1912 __kmp_itt_lock_destroyed( lck );
1913#endif /* USE_ITT_BUILD */
1914 DESTROY_LOCK( lck );
1915
1916 if ( ( __kmp_user_lock_kind == lk_tas )
1917 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1918 ;
1919 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001920#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921 else if ( ( __kmp_user_lock_kind == lk_futex )
1922 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1923 ;
1924 }
1925#endif
1926 else {
1927 __kmp_user_lock_free( user_lock, gtid, lck );
1928 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001929#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930} // __kmpc_destroy_lock
1931
1932/* destroy the lock */
1933void
1934__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001935#if KMP_USE_DYNAMIC_LOCK
1936
1937# if USE_ITT_BUILD
1938 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1939 __kmp_itt_lock_destroyed(ilk->lock);
1940# endif
1941 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1942
1943#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001944
1945 kmp_user_lock_p lck;
1946
1947 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1948 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1949 lck = (kmp_user_lock_p)user_lock;
1950 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001951#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 else if ( ( __kmp_user_lock_kind == lk_futex )
1953 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1954 <= OMP_NEST_LOCK_T_SIZE ) ) {
1955 lck = (kmp_user_lock_p)user_lock;
1956 }
1957#endif
1958 else {
1959 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1960 }
1961
1962#if USE_ITT_BUILD
1963 __kmp_itt_lock_destroyed( lck );
1964#endif /* USE_ITT_BUILD */
1965
1966 DESTROY_NESTED_LOCK( lck );
1967
1968 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1969 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1970 ;
1971 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001972#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 else if ( ( __kmp_user_lock_kind == lk_futex )
1974 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1975 <= OMP_NEST_LOCK_T_SIZE ) ) {
1976 ;
1977 }
1978#endif
1979 else {
1980 __kmp_user_lock_free( user_lock, gtid, lck );
1981 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001982#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983} // __kmpc_destroy_nest_lock
1984
1985void
1986__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001988#if KMP_USE_DYNAMIC_LOCK
1989 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1990# if USE_ITT_BUILD
1991 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
1992# endif
1993# if DYNA_USE_FAST_TAS
1994 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1995 DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1996 } else
1997# elif DYNA_USE_FAST_FUTEX
1998 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1999 DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2000 } else
2001# endif
2002 {
2003 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2004 }
2005# if USE_ITT_BUILD
2006 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2007# endif
2008
2009#else // KMP_USE_DYNAMIC_LOCK
2010
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011 kmp_user_lock_p lck;
2012
2013 if ( ( __kmp_user_lock_kind == lk_tas )
2014 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2015 lck = (kmp_user_lock_p)user_lock;
2016 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002017#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018 else if ( ( __kmp_user_lock_kind == lk_futex )
2019 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2020 lck = (kmp_user_lock_p)user_lock;
2021 }
2022#endif
2023 else {
2024 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2025 }
2026
2027#if USE_ITT_BUILD
2028 __kmp_itt_lock_acquiring( lck );
2029#endif /* USE_ITT_BUILD */
2030
2031 ACQUIRE_LOCK( lck, gtid );
2032
2033#if USE_ITT_BUILD
2034 __kmp_itt_lock_acquired( lck );
2035#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002037#endif // KMP_USE_DYNAMIC_LOCK
2038}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039
2040void
2041__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002042#if KMP_USE_DYNAMIC_LOCK
2043
2044# if USE_ITT_BUILD
2045 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2046# endif
2047 DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2048# if USE_ITT_BUILD
2049 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2050#endif
2051
2052#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053 kmp_user_lock_p lck;
2054
2055 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2056 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2057 lck = (kmp_user_lock_p)user_lock;
2058 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002059#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 else if ( ( __kmp_user_lock_kind == lk_futex )
2061 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2062 <= OMP_NEST_LOCK_T_SIZE ) ) {
2063 lck = (kmp_user_lock_p)user_lock;
2064 }
2065#endif
2066 else {
2067 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2068 }
2069
2070#if USE_ITT_BUILD
2071 __kmp_itt_lock_acquiring( lck );
2072#endif /* USE_ITT_BUILD */
2073
2074 ACQUIRE_NESTED_LOCK( lck, gtid );
2075
2076#if USE_ITT_BUILD
2077 __kmp_itt_lock_acquired( lck );
2078#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002079#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002080}
2081
2082void
2083__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2084{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002085#if KMP_USE_DYNAMIC_LOCK
2086
2087 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2088# if USE_ITT_BUILD
2089 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2090# endif
2091# if DYNA_USE_FAST_TAS
2092 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2093 DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
2094 } else
2095# elif DYNA_USE_FAST_FUTEX
2096 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2097 DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
2098 } else
2099# endif
2100 {
2101 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2102 }
2103
2104#else // KMP_USE_DYNAMIC_LOCK
2105
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106 kmp_user_lock_p lck;
2107
2108 /* Can't use serial interval since not block structured */
2109 /* release the lock */
2110
2111 if ( ( __kmp_user_lock_kind == lk_tas )
2112 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002113#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114 // "fast" path implemented to fix customer performance issue
2115#if USE_ITT_BUILD
2116 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2117#endif /* USE_ITT_BUILD */
2118 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2119 KMP_MB();
2120 return;
2121#else
2122 lck = (kmp_user_lock_p)user_lock;
2123#endif
2124 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002125#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126 else if ( ( __kmp_user_lock_kind == lk_futex )
2127 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2128 lck = (kmp_user_lock_p)user_lock;
2129 }
2130#endif
2131 else {
2132 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2133 }
2134
2135#if USE_ITT_BUILD
2136 __kmp_itt_lock_releasing( lck );
2137#endif /* USE_ITT_BUILD */
2138
2139 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002140
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002141#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002142 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002143 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2144 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2145 }
2146#endif
2147
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002148#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002149}
2150
2151/* release the lock */
2152void
2153__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2154{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002155#if KMP_USE_DYNAMIC_LOCK
2156
2157# if USE_ITT_BUILD
2158 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2159# endif
2160 DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2161
2162#else // KMP_USE_DYNAMIC_LOCK
2163
Jim Cownie5e8470a2013-09-27 10:38:44 +00002164 kmp_user_lock_p lck;
2165
2166 /* Can't use serial interval since not block structured */
2167
2168 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2169 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002170#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002171 // "fast" path implemented to fix customer performance issue
2172 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2173#if USE_ITT_BUILD
2174 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2175#endif /* USE_ITT_BUILD */
2176 if ( --(tl->lk.depth_locked) == 0 ) {
2177 TCW_4(tl->lk.poll, 0);
2178 }
2179 KMP_MB();
2180 return;
2181#else
2182 lck = (kmp_user_lock_p)user_lock;
2183#endif
2184 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002185#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 else if ( ( __kmp_user_lock_kind == lk_futex )
2187 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2188 <= OMP_NEST_LOCK_T_SIZE ) ) {
2189 lck = (kmp_user_lock_p)user_lock;
2190 }
2191#endif
2192 else {
2193 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2194 }
2195
2196#if USE_ITT_BUILD
2197 __kmp_itt_lock_releasing( lck );
2198#endif /* USE_ITT_BUILD */
2199
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002200 int release_status;
2201 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002202#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002203 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002204 if (release_status == KMP_LOCK_RELEASED) {
2205 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2206 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2207 (uint64_t) lck);
2208 }
2209 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2210 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2211 (uint64_t) lck);
2212 }
2213 }
2214#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002215
2216#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002217}
2218
2219/* try to acquire the lock */
2220int
2221__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2222{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002223 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002224
2225#if KMP_USE_DYNAMIC_LOCK
2226 int rc;
2227 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2228# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002229 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002230# endif
2231# if DYNA_USE_FAST_TAS
2232 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2233 DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2234 } else
2235# elif DYNA_USE_FAST_FUTEX
2236 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2237 DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2238 } else
2239# endif
2240 {
2241 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2242 }
2243 if (rc) {
2244# if USE_ITT_BUILD
2245 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2246# endif
2247 return FTN_TRUE;
2248 } else {
2249# if USE_ITT_BUILD
2250 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2251# endif
2252 return FTN_FALSE;
2253 }
2254
2255#else // KMP_USE_DYNAMIC_LOCK
2256
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 kmp_user_lock_p lck;
2258 int rc;
2259
2260 if ( ( __kmp_user_lock_kind == lk_tas )
2261 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2262 lck = (kmp_user_lock_p)user_lock;
2263 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002264#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265 else if ( ( __kmp_user_lock_kind == lk_futex )
2266 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2267 lck = (kmp_user_lock_p)user_lock;
2268 }
2269#endif
2270 else {
2271 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2272 }
2273
2274#if USE_ITT_BUILD
2275 __kmp_itt_lock_acquiring( lck );
2276#endif /* USE_ITT_BUILD */
2277
2278 rc = TEST_LOCK( lck, gtid );
2279#if USE_ITT_BUILD
2280 if ( rc ) {
2281 __kmp_itt_lock_acquired( lck );
2282 } else {
2283 __kmp_itt_lock_cancelled( lck );
2284 }
2285#endif /* USE_ITT_BUILD */
2286 return ( rc ? FTN_TRUE : FTN_FALSE );
2287
2288 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002289
2290#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002291}
2292
2293/* try to acquire the lock */
2294int
2295__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2296{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002297#if KMP_USE_DYNAMIC_LOCK
2298 int rc;
2299# if USE_ITT_BUILD
2300 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2301# endif
2302 rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2303# if USE_ITT_BUILD
2304 if (rc) {
2305 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2306 } else {
2307 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2308 }
2309# endif
2310 return rc;
2311
2312#else // KMP_USE_DYNAMIC_LOCK
2313
Jim Cownie5e8470a2013-09-27 10:38:44 +00002314 kmp_user_lock_p lck;
2315 int rc;
2316
2317 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2318 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2319 lck = (kmp_user_lock_p)user_lock;
2320 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002321#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002322 else if ( ( __kmp_user_lock_kind == lk_futex )
2323 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2324 <= OMP_NEST_LOCK_T_SIZE ) ) {
2325 lck = (kmp_user_lock_p)user_lock;
2326 }
2327#endif
2328 else {
2329 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2330 }
2331
2332#if USE_ITT_BUILD
2333 __kmp_itt_lock_acquiring( lck );
2334#endif /* USE_ITT_BUILD */
2335
2336 rc = TEST_NESTED_LOCK( lck, gtid );
2337#if USE_ITT_BUILD
2338 if ( rc ) {
2339 __kmp_itt_lock_acquired( lck );
2340 } else {
2341 __kmp_itt_lock_cancelled( lck );
2342 }
2343#endif /* USE_ITT_BUILD */
2344 return rc;
2345
2346 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002347
2348#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349}
2350
2351
2352/*--------------------------------------------------------------------------------------------------------------------*/
2353
2354/*
2355 * Interface to fast scalable reduce methods routines
2356 */
2357
2358// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2359// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2360// AT: which solution is better?
2361#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2362 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2363
2364#define __KMP_GET_REDUCTION_METHOD(gtid) \
2365 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2366
2367// description of the packed_reduction_method variable: look at the macros in kmp.h
2368
2369
2370// used in a critical section reduce block
2371static __forceinline void
2372__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2373
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002374 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375 // (although it's used for an internal purpose only)
2376 // why was it visible in previous implementation?
2377 // should we keep it visible in new reduce block?
2378 kmp_user_lock_p lck;
2379
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002380#if KMP_USE_DYNAMIC_LOCK
2381
2382 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2383 lck = (kmp_user_lock_p)crit;
2384 if (*((kmp_dyna_lock_t *)lck) == 0) {
2385 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2386 }
2387 KMP_DEBUG_ASSERT(lck != NULL);
2388 if (__kmp_env_consistency_check) {
2389 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2390 }
2391 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2392 } else {
2393 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2394 KMP_DEBUG_ASSERT(ilk != NULL);
2395 if (__kmp_env_consistency_check) {
2396 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2397 }
2398 DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2399 }
2400
2401#else // KMP_USE_DYNAMIC_LOCK
2402
Jim Cownie5e8470a2013-09-27 10:38:44 +00002403 // We know that the fast reduction code is only emitted by Intel compilers
2404 // with 32 byte critical sections. If there isn't enough space, then we
2405 // have to use a pointer.
2406 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2407 lck = (kmp_user_lock_p)crit;
2408 }
2409 else {
2410 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2411 }
2412 KMP_DEBUG_ASSERT( lck != NULL );
2413
2414 if ( __kmp_env_consistency_check )
2415 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2416
2417 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002418
2419#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002420}
2421
2422// used in a critical section reduce block
2423static __forceinline void
2424__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2425
2426 kmp_user_lock_p lck;
2427
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002428#if KMP_USE_DYNAMIC_LOCK
2429
2430 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2431 lck = (kmp_user_lock_p)crit;
2432 if (__kmp_env_consistency_check)
2433 __kmp_pop_sync(global_tid, ct_critical, loc);
2434 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2435 } else {
2436 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2437 if (__kmp_env_consistency_check)
2438 __kmp_pop_sync(global_tid, ct_critical, loc);
2439 DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2440 }
2441
2442#else // KMP_USE_DYNAMIC_LOCK
2443
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2445 // sections. If there isn't enough space, then we have to use a pointer.
2446 if ( __kmp_base_user_lock_size > 32 ) {
2447 lck = *( (kmp_user_lock_p *) crit );
2448 KMP_ASSERT( lck != NULL );
2449 } else {
2450 lck = (kmp_user_lock_p) crit;
2451 }
2452
2453 if ( __kmp_env_consistency_check )
2454 __kmp_pop_sync( global_tid, ct_critical, loc );
2455
2456 __kmp_release_user_lock_with_checks( lck, global_tid );
2457
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002458#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459} // __kmp_end_critical_section_reduce_block
2460
2461
2462/* 2.a.i. Reduce Block without a terminating barrier */
2463/*!
2464@ingroup SYNCHRONIZATION
2465@param loc source location information
2466@param global_tid global thread number
2467@param num_vars number of items (variables) to be reduced
2468@param reduce_size size of data in bytes to be reduced
2469@param reduce_data pointer to data to be reduced
2470@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2471@param lck pointer to the unique lock data structure
2472@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2473
2474The nowait version is used for a reduce clause with the nowait argument.
2475*/
2476kmp_int32
2477__kmpc_reduce_nowait(
2478 ident_t *loc, kmp_int32 global_tid,
2479 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2480 kmp_critical_name *lck ) {
2481
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002482 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002483 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002484 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002485#if OMP_40_ENABLED
2486 kmp_team_t *team;
2487 kmp_info_t *th;
2488 int teams_swapped = 0, task_state;
2489#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002490 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2491
2492 // why do we need this initialization here at all?
2493 // Reduction clause can not be used as a stand-alone directive.
2494
2495 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2496 // possible detection of false-positive race by the threadchecker ???
2497 if( ! TCR_4( __kmp_init_parallel ) )
2498 __kmp_parallel_initialize();
2499
2500 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002501#if KMP_USE_DYNAMIC_LOCK
2502 if ( __kmp_env_consistency_check )
2503 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2504#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505 if ( __kmp_env_consistency_check )
2506 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002507#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002508
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002509#if OMP_40_ENABLED
2510 th = __kmp_thread_from_gtid(global_tid);
2511 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2512 team = th->th.th_team;
2513 if( team->t.t_level == th->th.th_teams_level ) {
2514 // this is reduction at teams construct
2515 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2516 // Let's swap teams temporarily for the reduction barrier
2517 teams_swapped = 1;
2518 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2519 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002520 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002521 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002522 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002523 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002524 }
2525 }
2526#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527
2528 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2529 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2530 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2531 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2532 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2533 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2534 // a thread-specific "th_local.reduction_method" variable is used currently
2535 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2536
2537 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2538 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2539
2540 if( packed_reduction_method == critical_reduce_block ) {
2541
2542 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2543 retval = 1;
2544
2545 } else if( packed_reduction_method == empty_reduce_block ) {
2546
2547 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2548 retval = 1;
2549
2550 } else if( packed_reduction_method == atomic_reduce_block ) {
2551
2552 retval = 2;
2553
2554 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2555 // (it's not quite good, because the checking block has been closed by this 'pop',
2556 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2557 if ( __kmp_env_consistency_check )
2558 __kmp_pop_sync( global_tid, ct_reduce, loc );
2559
2560 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2561
2562 //AT: performance issue: a real barrier here
2563 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2564 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2565 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2566 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2567 // and be more in line with sense of NOWAIT
2568 //AT: TO DO: do epcc test and compare times
2569
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002570 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002571 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002572#if USE_ITT_NOTIFY
2573 __kmp_threads[global_tid]->th.th_ident = loc;
2574#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002575 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2576 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2577
2578 // all other workers except master should do this pop here
2579 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2580 if ( __kmp_env_consistency_check ) {
2581 if( retval == 0 ) {
2582 __kmp_pop_sync( global_tid, ct_reduce, loc );
2583 }
2584 }
2585
2586 } else {
2587
2588 // should never reach this block
2589 KMP_ASSERT( 0 ); // "unexpected method"
2590
2591 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002592#if OMP_40_ENABLED
2593 if( teams_swapped ) {
2594 // Restore thread structure
2595 th->th.th_info.ds.ds_tid = 0;
2596 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002597 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002598 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002599 th->th.th_task_state = task_state;
2600 }
2601#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002602 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2603
2604 return retval;
2605}
2606
2607/*!
2608@ingroup SYNCHRONIZATION
2609@param loc source location information
2610@param global_tid global thread id.
2611@param lck pointer to the unique lock data structure
2612
2613Finish the execution of a reduce nowait.
2614*/
2615void
2616__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2617
2618 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2619
2620 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2621
2622 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2623
2624 if( packed_reduction_method == critical_reduce_block ) {
2625
2626 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2627
2628 } else if( packed_reduction_method == empty_reduce_block ) {
2629
2630 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2631
2632 } else if( packed_reduction_method == atomic_reduce_block ) {
2633
2634 // neither master nor other workers should get here
2635 // (code gen does not generate this call in case 2: atomic reduce block)
2636 // actually it's better to remove this elseif at all;
2637 // after removal this value will checked by the 'else' and will assert
2638
2639 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2640
2641 // only master gets here
2642
2643 } else {
2644
2645 // should never reach this block
2646 KMP_ASSERT( 0 ); // "unexpected method"
2647
2648 }
2649
2650 if ( __kmp_env_consistency_check )
2651 __kmp_pop_sync( global_tid, ct_reduce, loc );
2652
2653 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2654
2655 return;
2656}
2657
2658/* 2.a.ii. Reduce Block with a terminating barrier */
2659
2660/*!
2661@ingroup SYNCHRONIZATION
2662@param loc source location information
2663@param global_tid global thread number
2664@param num_vars number of items (variables) to be reduced
2665@param reduce_size size of data in bytes to be reduced
2666@param reduce_data pointer to data to be reduced
2667@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2668@param lck pointer to the unique lock data structure
2669@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2670
2671A blocking reduce that includes an implicit barrier.
2672*/
2673kmp_int32
2674__kmpc_reduce(
2675 ident_t *loc, kmp_int32 global_tid,
2676 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2677 void (*reduce_func)(void *lhs_data, void *rhs_data),
2678 kmp_critical_name *lck )
2679{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002680 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002681 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2683
2684 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2685
2686 // why do we need this initialization here at all?
2687 // Reduction clause can not be a stand-alone directive.
2688
2689 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2690 // possible detection of false-positive race by the threadchecker ???
2691 if( ! TCR_4( __kmp_init_parallel ) )
2692 __kmp_parallel_initialize();
2693
2694 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002695#if KMP_USE_DYNAMIC_LOCK
2696 if ( __kmp_env_consistency_check )
2697 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2698#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699 if ( __kmp_env_consistency_check )
2700 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002702
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2704 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2705
2706 if( packed_reduction_method == critical_reduce_block ) {
2707
2708 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2709 retval = 1;
2710
2711 } else if( packed_reduction_method == empty_reduce_block ) {
2712
2713 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2714 retval = 1;
2715
2716 } else if( packed_reduction_method == atomic_reduce_block ) {
2717
2718 retval = 2;
2719
2720 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2721
2722 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002723 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002725#if USE_ITT_NOTIFY
2726 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2729 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2730
2731 // all other workers except master should do this pop here
2732 // ( none of other workers except master will enter __kmpc_end_reduce() )
2733 if ( __kmp_env_consistency_check ) {
2734 if( retval == 0 ) { // 0: all other workers; 1: master
2735 __kmp_pop_sync( global_tid, ct_reduce, loc );
2736 }
2737 }
2738
2739 } else {
2740
2741 // should never reach this block
2742 KMP_ASSERT( 0 ); // "unexpected method"
2743
2744 }
2745
2746 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2747
2748 return retval;
2749}
2750
2751/*!
2752@ingroup SYNCHRONIZATION
2753@param loc source location information
2754@param global_tid global thread id.
2755@param lck pointer to the unique lock data structure
2756
2757Finish the execution of a blocking reduce.
2758The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2759*/
2760void
2761__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2762
2763 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2764
2765 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2766
2767 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2768
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002769 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002770 // (it's a terminating barrier on constructs if NOWAIT not specified)
2771
2772 if( packed_reduction_method == critical_reduce_block ) {
2773
2774 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2775
2776 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002777#if USE_ITT_NOTIFY
2778 __kmp_threads[global_tid]->th.th_ident = loc;
2779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2781
2782 } else if( packed_reduction_method == empty_reduce_block ) {
2783
2784 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2785
2786 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002787#if USE_ITT_NOTIFY
2788 __kmp_threads[global_tid]->th.th_ident = loc;
2789#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002790 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2791
2792 } else if( packed_reduction_method == atomic_reduce_block ) {
2793
2794 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002795#if USE_ITT_NOTIFY
2796 __kmp_threads[global_tid]->th.th_ident = loc;
2797#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002798 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2799
2800 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2801
2802 // only master executes here (master releases all other workers)
2803 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2804
2805 } else {
2806
2807 // should never reach this block
2808 KMP_ASSERT( 0 ); // "unexpected method"
2809
2810 }
2811
2812 if ( __kmp_env_consistency_check )
2813 __kmp_pop_sync( global_tid, ct_reduce, loc );
2814
2815 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2816
2817 return;
2818}
2819
2820#undef __KMP_GET_REDUCTION_METHOD
2821#undef __KMP_SET_REDUCTION_METHOD
2822
2823/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2824
2825kmp_uint64
2826__kmpc_get_taskid() {
2827
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002828 kmp_int32 gtid;
2829 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002831 gtid = __kmp_get_gtid();
2832 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002833 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002834 }; // if
2835 thread = __kmp_thread_from_gtid( gtid );
2836 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002837
2838} // __kmpc_get_taskid
2839
2840
2841kmp_uint64
2842__kmpc_get_parent_taskid() {
2843
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002844 kmp_int32 gtid;
2845 kmp_info_t * thread;
2846 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002848 gtid = __kmp_get_gtid();
2849 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002850 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002851 }; // if
2852 thread = __kmp_thread_from_gtid( gtid );
2853 parent_task = thread->th.th_current_task->td_parent;
2854 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002855
2856} // __kmpc_get_parent_taskid
2857
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002858void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860 if ( ! __kmp_init_serial ) {
2861 __kmp_serial_initialize();
2862 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002863 __kmp_place_num_sockets = nS;
2864 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002866 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002867 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868}
2869
2870// end of file //
2871