blob: 23cf244a1346bc82c52f1d4c1a2915bd8b1ab611 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
24#include "ompt-internal.h"
25#include "ompt-specific.h"
26#endif
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#define MAX_MESSAGE 512
29
30/* ------------------------------------------------------------------------ */
31/* ------------------------------------------------------------------------ */
32
33/* flags will be used in future, e.g., to implement */
34/* openmp_strict library restrictions */
35
36/*!
37 * @ingroup STARTUP_SHUTDOWN
38 * @param loc in source location information
39 * @param flags in for future use (currently ignored)
40 *
41 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000043 *
44 */
45void
46__kmpc_begin(ident_t *loc, kmp_int32 flags)
47{
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 if (__kmp_ignore_mppbeg() == FALSE) {
50 __kmp_internal_begin();
51
52 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53 }
54}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
60 * Shutdown the runtime library. This is also optional, and even if called will not
61 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
62 */
63void
64__kmpc_end(ident_t *loc)
65{
66 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69 // will unregister this root (it can cause library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72 KA_TRACE( 30, ("__kmpc_end\n" ));
73
74 __kmp_internal_end_thread( -1 );
75 }
76}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
86single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000087which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000088active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
95
96*/
97kmp_int32
98__kmpc_global_thread_num(ident_t *loc)
99{
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime. That is
114not a number that can be determined by any OpenMP standard calls, since the library may be
115called from more than one non-OpenMP thread, and this reflects the total over all such calls.
116Similarly the runtime maintains underlying threads even when they are not active (since the cost
117of creating and destroying OS threads is high), this call counts all such threads even if they are not
118waiting for work.
119*/
120kmp_int32
121__kmpc_global_num_threads(ident_t *loc)
122{
123 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124
125 return TCR_4(__kmp_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel construct.
132
133*/
134kmp_int32
135__kmpc_bound_thread_num(ident_t *loc)
136{
137 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
146kmp_int32
147__kmpc_bound_num_threads(ident_t *loc)
148{
149 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150
151 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152}
153
154/*!
155 * @ingroup DEPRECATED
156 * @param loc location description
157 *
158 * This function need not be called. It always returns TRUE.
159 */
160kmp_int32
161__kmpc_ok_to_fork(ident_t *loc)
162{
163#ifndef KMP_DEBUG
164
165 return TRUE;
166
167#else
168
169 const char *semi2;
170 const char *semi3;
171 int line_no;
172
173 if (__kmp_par_range == 0) {
174 return TRUE;
175 }
176 semi2 = loc->psource;
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 semi2 = strchr(semi2 + 1, ';');
185 if (semi2 == NULL) {
186 return TRUE;
187 }
188 if (__kmp_par_range_filename[0]) {
189 const char *name = semi2 - 1;
190 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191 name--;
192 }
193 if ((*name == '/') || (*name == ';')) {
194 name++;
195 }
196 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197 return __kmp_par_range < 0;
198 }
199 }
200 semi3 = strchr(semi2 + 1, ';');
201 if (__kmp_par_range_routine[0]) {
202 if ((semi3 != NULL) && (semi3 > semi2)
203 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204 return __kmp_par_range < 0;
205 }
206 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000207 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209 return __kmp_par_range > 0;
210 }
211 return __kmp_par_range < 0;
212 }
213 return TRUE;
214
215#endif /* KMP_DEBUG */
216
217}
218
219/*!
220@ingroup THREAD_STATES
221@param loc Source location information.
222@return 1 if this thread is executing inside an active parallel region, zero if not.
223*/
224kmp_int32
225__kmpc_in_parallel( ident_t *loc )
226{
227 return __kmp_entry_thread() -> th.th_root -> r.r_active;
228}
229
230/*!
231@ingroup PARALLEL
232@param loc source location information
233@param global_tid global thread number
234@param num_threads number of threads requested for this parallel construct
235
236Set the number of threads to be used by the next fork spawned by this thread.
237This call is only required if the parallel construct has a `num_threads` clause.
238*/
239void
240__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241{
242 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243 global_tid, num_threads ) );
244
245 __kmp_push_num_threads( loc, global_tid, num_threads );
246}
247
248void
249__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250{
251 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252
253 /* the num_threads are automatically popped */
254}
255
256
257#if OMP_40_ENABLED
258
259void
260__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261{
262 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263 global_tid, proc_bind ) );
264
265 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266}
267
268#endif /* OMP_40_ENABLED */
269
270
271/*!
272@ingroup PARALLEL
273@param loc source location information
274@param argc total number of arguments in the ellipsis
275@param microtask pointer to callback routine consisting of outlined parallel construct
276@param ... pointers to shared variables that aren't global
277
278Do the actual fork and call the microtask in the relevant number of threads.
279*/
280void
281__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282{
283 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000284
285#if (KMP_STATS_ENABLED)
286 int inParallel = __kmpc_in_parallel(loc);
287 if (inParallel)
288 {
289 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
290 }
291 else
292 {
293 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000304 int tid = __kmp_tid_from_gtid( gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000305 kmp_info_t *master_th = __kmp_threads[ gtid ];
306 kmp_team_t *parent_team = master_th->th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000307 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000308 parent_team->t.t_implicit_task_taskdata[tid].
309 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
316 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000318#if OMPT_SUPPORT
319 VOLATILE_CAST(void *) microtask, // "unwrapped" task
320#endif
321 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
323/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000324#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 &ap
326#else
327 ap
328#endif
329 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330#if INCLUDE_SSC_MARKS
331 SSC_MARK_JOINING();
332#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333 __kmp_join_call( loc, gtid
334#if OMPT_SUPPORT
335 , fork_context_intel
336#endif
337 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338
339 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000340
341#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000342 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000343 parent_team->t.t_implicit_task_taskdata[tid].
344 ompt_task_info.frame.reenter_runtime_frame = 0;
345 }
346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000348#if (KMP_STATS_ENABLED)
349 if (!inParallel)
350 KMP_START_EXPLICIT_TIMER(OMP_serial);
351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352}
353
354#if OMP_40_ENABLED
355/*!
356@ingroup PARALLEL
357@param loc source location information
358@param global_tid global thread number
359@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000360@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361
362Set the number of teams to be used by the teams construct.
363This call is only required if the teams construct has a `num_teams` clause
364or a `thread_limit` clause (or both).
365*/
366void
367__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
368{
369 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
370 global_tid, num_teams, num_threads ) );
371
372 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
373}
374
375/*!
376@ingroup PARALLEL
377@param loc source location information
378@param argc total number of arguments in the ellipsis
379@param microtask pointer to callback routine consisting of outlined teams construct
380@param ... pointers to shared variables that aren't global
381
382Do the actual fork and call the microtask in the relevant number of threads.
383*/
384void
385__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
386{
387 int gtid = __kmp_entry_gtid();
388 kmp_info_t *this_thr = __kmp_threads[ gtid ];
389 va_list ap;
390 va_start( ap, microtask );
391
Jonathan Peyton45be4502015-08-11 21:36:41 +0000392 KMP_COUNT_BLOCK(OMP_TEAMS);
393
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000395 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
397
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000398#if OMPT_SUPPORT
399 kmp_team_t *parent_team = this_thr->th.th_team;
400 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000401 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000402 parent_team->t.t_implicit_task_taskdata[tid].
403 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
404 }
405#endif
406
Jim Cownie5e8470a2013-09-27 10:38:44 +0000407 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000408 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000409 __kmp_push_num_teams( loc, gtid, 0, 0 );
410 }
411 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
413 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000415 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000417#if OMPT_SUPPORT
418 VOLATILE_CAST(void *) microtask, // "unwrapped" task
419#endif
420 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000421 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000422#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000423 &ap
424#else
425 ap
426#endif
427 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000428 __kmp_join_call( loc, gtid
429#if OMPT_SUPPORT
430 , fork_context_intel
431#endif
432 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000433
434#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000435 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000436 parent_team->t.t_implicit_task_taskdata[tid].
437 ompt_task_info.frame.reenter_runtime_frame = NULL;
438 }
439#endif
440
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000441 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000442 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000443 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000444 va_end( ap );
445}
446#endif /* OMP_40_ENABLED */
447
448
449//
450// I don't think this function should ever have been exported.
451// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
452// openmp code ever called it, but it's been exported from the RTL for so
453// long that I'm afraid to remove the definition.
454//
455int
456__kmpc_invoke_task_func( int gtid )
457{
458 return __kmp_invoke_task_func( gtid );
459}
460
461/*!
462@ingroup PARALLEL
463@param loc source location information
464@param global_tid global thread number
465
466Enter a serialized parallel construct. This interface is used to handle a
467conditional parallel region, like this,
468@code
469#pragma omp parallel if (condition)
470@endcode
471when the condition is false.
472*/
473void
474__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
475{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000476 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
477 * kmp_fork_call since the tasks to be done are similar in each case.
478 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479}
480
481/*!
482@ingroup PARALLEL
483@param loc source location information
484@param global_tid global thread number
485
486Leave a serialized parallel construct.
487*/
488void
489__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
490{
491 kmp_internal_control_t *top;
492 kmp_info_t *this_thr;
493 kmp_team_t *serial_team;
494
495 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
496
497 /* skip all this code for autopar serialized loops since it results in
498 unacceptable overhead */
499 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
500 return;
501
502 // Not autopar code
503 if( ! TCR_4( __kmp_init_parallel ) )
504 __kmp_parallel_initialize();
505
506 this_thr = __kmp_threads[ global_tid ];
507 serial_team = this_thr->th.th_serial_team;
508
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000509 #if OMP_41_ENABLED
510 kmp_task_team_t * task_team = this_thr->th.th_task_team;
511
512 // we need to wait for the proxy tasks before finishing the thread
513 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
514 __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
515 #endif
516
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517 KMP_MB();
518 KMP_DEBUG_ASSERT( serial_team );
519 KMP_ASSERT( serial_team -> t.t_serialized );
520 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
521 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
522 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
523 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
524
525 /* If necessary, pop the internal control stack values and replace the team values */
526 top = serial_team -> t.t_control_stack_top;
527 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529 serial_team -> t.t_control_stack_top = top -> next;
530 __kmp_free(top);
531 }
532
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 //if( serial_team -> t.t_serialized > 1 )
534 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535
536 /* pop dispatch buffers stack */
537 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
538 {
539 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
540 serial_team->t.t_dispatch->th_disp_buffer =
541 serial_team->t.t_dispatch->th_disp_buffer->next;
542 __kmp_free( disp_buffer );
543 }
544
545 -- serial_team -> t.t_serialized;
546 if ( serial_team -> t.t_serialized == 0 ) {
547
548 /* return to the parallel section */
549
550#if KMP_ARCH_X86 || KMP_ARCH_X86_64
551 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
552 __kmp_clear_x87_fpu_status_word();
553 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
554 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
555 }
556#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
557
558 this_thr -> th.th_team = serial_team -> t.t_parent;
559 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
560
561 /* restore values cached in the thread */
562 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
563 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
564 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
565
566 /* TODO the below shouldn't need to be adjusted for serialized teams */
567 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
568 t.t_dispatch[ serial_team -> t.t_master_tid ];
569
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570 __kmp_pop_current_task_from_thread( this_thr );
571
572 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
573 this_thr -> th.th_current_task -> td_flags.executing = 1;
574
575 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000576 // Copy the task team from the new child / old parent team to the thread.
577 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
579 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
580 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000581 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
583 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
584 global_tid, serial_team, serial_team -> t.t_serialized ) );
585 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586 }
587
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000588#if USE_ITT_BUILD
589 kmp_uint64 cur_time = 0;
590#if USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000591 if ( __itt_get_timestamp_ptr ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000592 cur_time = __itt_get_timestamp();
593 }
594#endif /* USE_ITT_NOTIFY */
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000595 if ( this_thr->th.th_team->t.t_level == 0
596#if OMP_40_ENABLED
597 && this_thr->th.th_teams_microtask == NULL
598#endif
599 ) {
600 // Report the barrier
Jim Cownie181b4bb2013-12-23 17:28:57 +0000601 this_thr->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000602 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
603 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
604 {
605 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
606 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
607 if ( __kmp_forkjoin_frames_mode == 3 )
608 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
609 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
610 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
611 } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
612 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
613 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
614 __kmp_itt_region_joined( global_tid, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615 }
616#endif /* USE_ITT_BUILD */
617
618 if ( __kmp_env_consistency_check )
619 __kmp_pop_parallel( global_tid, NULL );
620}
621
622/*!
623@ingroup SYNCHRONIZATION
624@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000625
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000626Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627depending on the memory ordering convention obeyed by the compiler
628even that may not be necessary).
629*/
630void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000631__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632{
633 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
634
635 /* need explicit __mf() here since use volatile instead in library */
636 KMP_MB(); /* Flush all pending memory write invalidates. */
637
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000638 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
639 #if KMP_MIC
640 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
641 // We shouldn't need it, though, since the ABI rules require that
642 // * If the compiler generates NGO stores it also generates the fence
643 // * If users hand-code NGO stores they should insert the fence
644 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646 // C74404
647 // This is to address non-temporal store instructions (sfence needed).
648 // The clflush instruction is addressed either (mfence needed).
649 // Probably the non-temporal load monvtdqa instruction should also be addressed.
650 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651 if ( ! __kmp_cpuinfo.initialized ) {
652 __kmp_query_cpuid( & __kmp_cpuinfo );
653 }; // if
654 if ( ! __kmp_cpuinfo.sse2 ) {
655 // CPU cannot execute SSE2 instructions.
656 } else {
657 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
658 _mm_mfence();
659 #else
660 __sync_synchronize();
661 #endif // KMP_COMPILER_ICC
662 }; // if
663 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000664 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
665 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 #elif KMP_ARCH_PPC64
667 // Nothing needed here (we have a real MB above).
668 #if KMP_OS_CNK
669 // The flushing thread needs to yield here; this prevents a
670 // busy-waiting thread from saturating the pipeline. flush is
671 // often used in loops like this:
672 // while (!flag) {
673 // #pragma omp flush(flag)
674 // }
675 // and adding the yield here is good for at least a 10x speedup
676 // when running >2 threads per core (on the NAS LU benchmark).
677 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000679 #else
680 #error Unknown or unsupported architecture
681 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682
683}
684
685/* -------------------------------------------------------------------------- */
686
687/* -------------------------------------------------------------------------- */
688
689/*!
690@ingroup SYNCHRONIZATION
691@param loc source location information
692@param global_tid thread id.
693
694Execute a barrier.
695*/
696void
697__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
698{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000699 KMP_COUNT_BLOCK(OMP_BARRIER);
700 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
702
703 if (! TCR_4(__kmp_init_parallel))
704 __kmp_parallel_initialize();
705
706 if ( __kmp_env_consistency_check ) {
707 if ( loc == 0 ) {
708 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
709 }; // if
710
711 __kmp_check_barrier( global_tid, ct_barrier, loc );
712 }
713
714 __kmp_threads[ global_tid ]->th.th_ident = loc;
715 // TODO: explicit barrier_wait_id:
716 // this function is called when 'barrier' directive is present or
717 // implicit barrier at the end of a worksharing construct.
718 // 1) better to add a per-thread barrier counter to a thread data structure
719 // 2) set to 0 when a new team is created
720 // 4) no sync is required
721
722 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
723}
724
725/* The BARRIER for a MASTER section is always explicit */
726/*!
727@ingroup WORK_SHARING
728@param loc source location information.
729@param global_tid global thread number .
730@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
731*/
732kmp_int32
733__kmpc_master(ident_t *loc, kmp_int32 global_tid)
734{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000735 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 int status = 0;
737
738 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
739
740 if( ! TCR_4( __kmp_init_parallel ) )
741 __kmp_parallel_initialize();
742
Jonathan Peyton45be4502015-08-11 21:36:41 +0000743 if( KMP_MASTER_GTID( global_tid )) {
744 KMP_START_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000746 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000748#if OMPT_SUPPORT && OMPT_TRACE
749 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000750 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000751 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000752 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
753 kmp_team_t *team = this_thr -> th.th_team;
754
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000755 int tid = __kmp_tid_from_gtid( global_tid );
756 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
757 team->t.ompt_team_info.parallel_id,
758 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
759 }
760 }
761#endif
762
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000764#if KMP_USE_DYNAMIC_LOCK
765 if (status)
766 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
767 else
768 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
769#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770 if (status)
771 __kmp_push_sync( global_tid, ct_master, loc, NULL );
772 else
773 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775 }
776
777 return status;
778}
779
780/*!
781@ingroup WORK_SHARING
782@param loc source location information.
783@param global_tid global thread number .
784
785Mark the end of a <tt>master</tt> region. This should only be called by the thread
786that executes the <tt>master</tt> region.
787*/
788void
789__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
790{
791 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
792
793 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton45be4502015-08-11 21:36:41 +0000794 KMP_STOP_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000796#if OMPT_SUPPORT && OMPT_TRACE
797 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
798 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000799 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000800 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
801 int tid = __kmp_tid_from_gtid( global_tid );
802 ompt_callbacks.ompt_callback(ompt_event_master_end)(
803 team->t.ompt_team_info.parallel_id,
804 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
805 }
806#endif
807
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808 if ( __kmp_env_consistency_check ) {
809 if( global_tid < 0 )
810 KMP_WARNING( ThreadIdentInvalid );
811
812 if( KMP_MASTER_GTID( global_tid ))
813 __kmp_pop_sync( global_tid, ct_master, loc );
814 }
815}
816
817/*!
818@ingroup WORK_SHARING
819@param loc source location information.
820@param gtid global thread number.
821
822Start execution of an <tt>ordered</tt> construct.
823*/
824void
825__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
826{
827 int cid = 0;
828 kmp_info_t *th;
829 KMP_DEBUG_ASSERT( __kmp_init_serial );
830
831 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
832
833 if (! TCR_4(__kmp_init_parallel))
834 __kmp_parallel_initialize();
835
836#if USE_ITT_BUILD
837 __kmp_itt_ordered_prep( gtid );
838 // TODO: ordered_wait_id
839#endif /* USE_ITT_BUILD */
840
841 th = __kmp_threads[ gtid ];
842
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000844 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000845 /* OMPT state update */
846 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
847 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
848
849 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000850 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000851 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
852 th->th.ompt_thread_info.wait_id);
853 }
854 }
855#endif
856
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
858 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
859 else
860 __kmp_parallel_deo( & gtid, & cid, loc );
861
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000862#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000863 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864 /* OMPT state update */
865 th->th.ompt_thread_info.state = ompt_state_work_parallel;
866 th->th.ompt_thread_info.wait_id = 0;
867
868 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000869 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000870 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
871 th->th.ompt_thread_info.wait_id);
872 }
873 }
874#endif
875
Jim Cownie5e8470a2013-09-27 10:38:44 +0000876#if USE_ITT_BUILD
877 __kmp_itt_ordered_start( gtid );
878#endif /* USE_ITT_BUILD */
879}
880
881/*!
882@ingroup WORK_SHARING
883@param loc source location information.
884@param gtid global thread number.
885
886End execution of an <tt>ordered</tt> construct.
887*/
888void
889__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
890{
891 int cid = 0;
892 kmp_info_t *th;
893
894 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
895
896#if USE_ITT_BUILD
897 __kmp_itt_ordered_end( gtid );
898 // TODO: ordered_wait_id
899#endif /* USE_ITT_BUILD */
900
901 th = __kmp_threads[ gtid ];
902
903 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
904 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
905 else
906 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000907
908#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000909 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000910 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
911 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
912 th->th.ompt_thread_info.wait_id);
913 }
914#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915}
916
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917#if KMP_USE_DYNAMIC_LOCK
918
919static __forceinline kmp_indirect_lock_t *
920__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
921{
922 // Code from __kmp_get_critical_section_ptr
923 // This function returns an indirect lock object instead of a user lock.
924 kmp_indirect_lock_t **lck, *ret;
925 lck = (kmp_indirect_lock_t **)crit;
926 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
927 if (ret == NULL) {
928 void *idx;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000929 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000930 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
931 ret = ilk;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000932 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
933 KMP_SET_I_LOCK_LOCATION(ilk, loc);
934 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000935 KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
936#if USE_ITT_BUILD
937 __kmp_itt_critical_creating(ilk->lock, loc);
938#endif
939 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
940 if (status == 0) {
941#if USE_ITT_BUILD
942 __kmp_itt_critical_destroyed(ilk->lock);
943#endif
944 // Postponing destroy, to avoid costly dispatch here.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000945 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000946 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
947 KMP_DEBUG_ASSERT(ret != NULL);
948 }
949 }
950 return ret;
951}
952
953// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000954#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000955 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000956 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
957 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000958 kmp_uint32 spins; \
959 KMP_FSYNC_PREPARE(l); \
960 KMP_INIT_YIELD(spins); \
961 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
962 KMP_YIELD(TRUE); \
963 } else { \
964 KMP_YIELD_SPIN(spins); \
965 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000966 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
967 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000968 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
969 KMP_YIELD(TRUE); \
970 } else { \
971 KMP_YIELD_SPIN(spins); \
972 } \
973 } \
974 } \
975 KMP_FSYNC_ACQUIRED(l); \
976}
977
978// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000979#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000980 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000981 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
982 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000983}
984
985// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000986#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
987 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000988 KMP_MB(); \
989}
990
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000991#if KMP_HAS_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000992
993# include <unistd.h>
994# include <sys/syscall.h>
995# ifndef FUTEX_WAIT
996# define FUTEX_WAIT 0
997# endif
998# ifndef FUTEX_WAKE
999# define FUTEX_WAKE 1
1000# endif
1001
1002// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001003#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001004 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1005 kmp_int32 gtid_code = (gtid+1) << 1; \
1006 KMP_MB(); \
1007 KMP_FSYNC_PREPARE(ftx); \
1008 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001009 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1010 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1011 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001012 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001013 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001014 continue; \
1015 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001016 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001017 } \
1018 kmp_int32 rc; \
1019 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
1020 continue; \
1021 } \
1022 gtid_code |= 1; \
1023 } \
1024 KMP_FSYNC_ACQUIRED(ftx); \
1025}
1026
1027// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001028#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001029 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001030 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001031 KMP_FSYNC_ACQUIRED(ftx); \
1032 rc = TRUE; \
1033 } else { \
1034 rc = FALSE; \
1035 } \
1036}
1037
1038// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001039#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041 KMP_MB(); \
1042 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001043 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1044 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1045 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001046 } \
1047 KMP_MB(); \
1048 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1049}
1050
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001051#endif // KMP_HAS_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001052
1053#else // KMP_USE_DYNAMIC_LOCK
1054
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055static kmp_user_lock_p
1056__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1057{
1058 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1059
1060 //
1061 // Because of the double-check, the following load
1062 // doesn't need to be volatile.
1063 //
1064 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1065
1066 if ( lck == NULL ) {
1067 void * idx;
1068
1069 // Allocate & initialize the lock.
1070 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1071 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1072 __kmp_init_user_lock_with_checks( lck );
1073 __kmp_set_user_lock_location( lck, loc );
1074#if USE_ITT_BUILD
1075 __kmp_itt_critical_creating( lck );
1076 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1077 // lock. It is the only place where we can guarantee it. There are chances the lock will
1078 // destroyed with no usage, but it is not a problem, because this is not real event seen
1079 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1080#endif /* USE_ITT_BUILD */
1081
1082 //
1083 // Use a cmpxchg instruction to slam the start of the critical
1084 // section with the lock pointer. If another thread beat us
1085 // to it, deallocate the lock, and use the lock that the other
1086 // thread allocated.
1087 //
1088 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1089
1090 if ( status == 0 ) {
1091 // Deallocate the lock and reload the value.
1092#if USE_ITT_BUILD
1093 __kmp_itt_critical_destroyed( lck );
1094 // Let ITT know the lock is destroyed and the same memory location may be reused for
1095 // another purpose.
1096#endif /* USE_ITT_BUILD */
1097 __kmp_destroy_user_lock_with_checks( lck );
1098 __kmp_user_lock_free( &idx, gtid, lck );
1099 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1100 KMP_DEBUG_ASSERT( lck != NULL );
1101 }
1102 }
1103 return lck;
1104}
1105
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001106#endif // KMP_USE_DYNAMIC_LOCK
1107
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108/*!
1109@ingroup WORK_SHARING
1110@param loc source location information.
1111@param global_tid global thread number .
1112@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1113some other suitably unique value.
1114
1115Enter code protected by a `critical` construct.
1116This function blocks until the executing thread can enter the critical section.
1117*/
1118void
1119__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001120 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121
1122 kmp_user_lock_p lck;
1123
1124 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1125
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001126#if KMP_USE_DYNAMIC_LOCK
1127 // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
1128 // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001129 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001130 lck = (kmp_user_lock_p)crit;
1131 // The thread that reaches here first needs to tag the lock word.
1132 if (*((kmp_dyna_lock_t *)lck) == 0) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001133 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001134 }
1135 if (__kmp_env_consistency_check) {
1136 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1137 }
1138# if USE_ITT_BUILD
1139 __kmp_itt_critical_acquiring(lck);
1140# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001141# if KMP_USE_FAST_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001142 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001143 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001144 } else
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001145# elif KMP_USE_FAST_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001146 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001147 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001148 } else
1149# endif
1150 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001151 KMP_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001152 }
1153 } else {
1154 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1155 lck = ilk->lock;
1156 if (__kmp_env_consistency_check) {
1157 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1158 }
1159# if USE_ITT_BUILD
1160 __kmp_itt_critical_acquiring(lck);
1161# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001162 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001163 }
1164
1165#else // KMP_USE_DYNAMIC_LOCK
1166
Jim Cownie5e8470a2013-09-27 10:38:44 +00001167 //TODO: add THR_OVHD_STATE
1168
1169 KMP_CHECK_USER_LOCK_INIT();
1170
1171 if ( ( __kmp_user_lock_kind == lk_tas )
1172 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1173 lck = (kmp_user_lock_p)crit;
1174 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001175#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176 else if ( ( __kmp_user_lock_kind == lk_futex )
1177 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1178 lck = (kmp_user_lock_p)crit;
1179 }
1180#endif
1181 else { // ticket, queuing or drdpa
1182 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1183 }
1184
1185 if ( __kmp_env_consistency_check )
1186 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1187
1188 /* since the critical directive binds to all threads, not just
1189 * the current team we have to check this even if we are in a
1190 * serialized team */
1191 /* also, even if we are the uber thread, we still have to conduct the lock,
1192 * as we have to contend with sibling threads */
1193
1194#if USE_ITT_BUILD
1195 __kmp_itt_critical_acquiring( lck );
1196#endif /* USE_ITT_BUILD */
1197 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001198 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1199
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001200#endif // KMP_USE_DYNAMIC_LOCK
1201
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202#if USE_ITT_BUILD
1203 __kmp_itt_critical_acquired( lck );
1204#endif /* USE_ITT_BUILD */
1205
1206 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1207} // __kmpc_critical
1208
1209/*!
1210@ingroup WORK_SHARING
1211@param loc source location information.
1212@param global_tid global thread number .
1213@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1214some other suitably unique value.
1215
1216Leave a critical section, releasing any lock that was held during its execution.
1217*/
1218void
1219__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1220{
1221 kmp_user_lock_p lck;
1222
1223 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1224
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001225#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001226 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001227 lck = (kmp_user_lock_p)crit;
1228 KMP_ASSERT(lck != NULL);
1229 if (__kmp_env_consistency_check) {
1230 __kmp_pop_sync(global_tid, ct_critical, loc);
1231 }
1232# if USE_ITT_BUILD
1233 __kmp_itt_critical_releasing( lck );
1234# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001235# if KMP_USE_FAST_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001236 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001237 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001238 } else
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001239# elif KMP_USE_FAST_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001240 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001241 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001242 } else
1243# endif
1244 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001245 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001246 }
1247 } else {
1248 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1249 KMP_ASSERT(ilk != NULL);
1250 lck = ilk->lock;
1251 if (__kmp_env_consistency_check) {
1252 __kmp_pop_sync(global_tid, ct_critical, loc);
1253 }
1254# if USE_ITT_BUILD
1255 __kmp_itt_critical_releasing( lck );
1256# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001257 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001258 }
1259
1260#else // KMP_USE_DYNAMIC_LOCK
1261
Jim Cownie5e8470a2013-09-27 10:38:44 +00001262 if ( ( __kmp_user_lock_kind == lk_tas )
1263 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1264 lck = (kmp_user_lock_p)crit;
1265 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001266#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001267 else if ( ( __kmp_user_lock_kind == lk_futex )
1268 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1269 lck = (kmp_user_lock_p)crit;
1270 }
1271#endif
1272 else { // ticket, queuing or drdpa
1273 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1274 }
1275
1276 KMP_ASSERT(lck != NULL);
1277
1278 if ( __kmp_env_consistency_check )
1279 __kmp_pop_sync( global_tid, ct_critical, loc );
1280
1281#if USE_ITT_BUILD
1282 __kmp_itt_critical_releasing( lck );
1283#endif /* USE_ITT_BUILD */
1284 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001285 __kmp_release_user_lock_with_checks( lck, global_tid );
1286
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001287#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001288 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001289 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1290 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1291 (uint64_t) lck);
1292 }
1293#endif
1294
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001295#endif // KMP_USE_DYNAMIC_LOCK
1296
Jim Cownie5e8470a2013-09-27 10:38:44 +00001297 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1298}
1299
1300/*!
1301@ingroup SYNCHRONIZATION
1302@param loc source location information
1303@param global_tid thread id.
1304@return one if the thread should execute the master block, zero otherwise
1305
1306Start execution of a combined barrier and master. The barrier is executed inside this function.
1307*/
1308kmp_int32
1309__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1310{
1311 int status;
1312
1313 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1314
1315 if (! TCR_4(__kmp_init_parallel))
1316 __kmp_parallel_initialize();
1317
1318 if ( __kmp_env_consistency_check )
1319 __kmp_check_barrier( global_tid, ct_barrier, loc );
1320
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001321#if USE_ITT_NOTIFY
1322 __kmp_threads[global_tid]->th.th_ident = loc;
1323#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1325
1326 return (status != 0) ? 0 : 1;
1327}
1328
1329/*!
1330@ingroup SYNCHRONIZATION
1331@param loc source location information
1332@param global_tid thread id.
1333
1334Complete the execution of a combined barrier and master. This function should
1335only be called at the completion of the <tt>master</tt> code. Other threads will
1336still be waiting at the barrier and this call releases them.
1337*/
1338void
1339__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1340{
1341 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1342
1343 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1344}
1345
1346/*!
1347@ingroup SYNCHRONIZATION
1348@param loc source location information
1349@param global_tid thread id.
1350@return one if the thread should execute the master block, zero otherwise
1351
1352Start execution of a combined barrier and master(nowait) construct.
1353The barrier is executed inside this function.
1354There is no equivalent "end" function, since the
1355*/
1356kmp_int32
1357__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1358{
1359 kmp_int32 ret;
1360
1361 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1362
1363 if (! TCR_4(__kmp_init_parallel))
1364 __kmp_parallel_initialize();
1365
1366 if ( __kmp_env_consistency_check ) {
1367 if ( loc == 0 ) {
1368 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1369 }
1370 __kmp_check_barrier( global_tid, ct_barrier, loc );
1371 }
1372
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001373#if USE_ITT_NOTIFY
1374 __kmp_threads[global_tid]->th.th_ident = loc;
1375#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1377
1378 ret = __kmpc_master (loc, global_tid);
1379
1380 if ( __kmp_env_consistency_check ) {
1381 /* there's no __kmpc_end_master called; so the (stats) */
1382 /* actions of __kmpc_end_master are done here */
1383
1384 if ( global_tid < 0 ) {
1385 KMP_WARNING( ThreadIdentInvalid );
1386 }
1387 if (ret) {
1388 /* only one thread should do the pop since only */
1389 /* one did the push (see __kmpc_master()) */
1390
1391 __kmp_pop_sync( global_tid, ct_master, loc );
1392 }
1393 }
1394
1395 return (ret);
1396}
1397
1398/* The BARRIER for a SINGLE process section is always explicit */
1399/*!
1400@ingroup WORK_SHARING
1401@param loc source location information
1402@param global_tid global thread number
1403@return One if this thread should execute the single construct, zero otherwise.
1404
1405Test whether to execute a <tt>single</tt> construct.
1406There are no implicit barriers in the two "single" calls, rather the compiler should
1407introduce an explicit barrier if it is required.
1408*/
1409
1410kmp_int32
1411__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1412{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001415 if(rc == TRUE) {
1416 KMP_START_EXPLICIT_TIMER(OMP_single);
1417 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001418
1419#if OMPT_SUPPORT && OMPT_TRACE
1420 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1421 kmp_team_t *team = this_thr -> th.th_team;
1422 int tid = __kmp_tid_from_gtid( global_tid );
1423
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001424 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001425 if (rc) {
1426 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1427 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1428 team->t.ompt_team_info.parallel_id,
1429 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1430 team->t.ompt_team_info.microtask);
1431 }
1432 } else {
1433 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1434 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1435 team->t.ompt_team_info.parallel_id,
1436 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1437 }
1438 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1439 }
1440 }
1441#endif
1442
Jim Cownie5e8470a2013-09-27 10:38:44 +00001443 return rc;
1444}
1445
1446/*!
1447@ingroup WORK_SHARING
1448@param loc source location information
1449@param global_tid global thread number
1450
1451Mark the end of a <tt>single</tt> construct. This function should
1452only be called by the thread that executed the block of code protected
1453by the `single` construct.
1454*/
1455void
1456__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1457{
1458 __kmp_exit_single( global_tid );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001459 KMP_STOP_EXPLICIT_TIMER(OMP_single);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001460
1461#if OMPT_SUPPORT && OMPT_TRACE
1462 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1463 kmp_team_t *team = this_thr -> th.th_team;
1464 int tid = __kmp_tid_from_gtid( global_tid );
1465
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001466 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001467 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1468 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1469 team->t.ompt_team_info.parallel_id,
1470 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1471 }
1472#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473}
1474
1475/*!
1476@ingroup WORK_SHARING
1477@param loc Source location
1478@param global_tid Global thread id
1479
1480Mark the end of a statically scheduled loop.
1481*/
1482void
1483__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1484{
1485 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1486
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001487#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001488 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001489 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonathan Peytonf0344bb2015-10-09 17:42:52 +00001490 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1491 kmp_team_t *team = this_thr -> th.th_team;
1492 int tid = __kmp_tid_from_gtid( global_tid );
1493
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001494 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1495 team->t.ompt_team_info.parallel_id,
1496 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1497 }
1498#endif
1499
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500 if ( __kmp_env_consistency_check )
1501 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1502}
1503
1504/*
1505 * User routines which take C-style arguments (call by value)
1506 * different from the Fortran equivalent routines
1507 */
1508
1509void
1510ompc_set_num_threads( int arg )
1511{
1512// !!!!! TODO: check the per-task binding
1513 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1514}
1515
1516void
1517ompc_set_dynamic( int flag )
1518{
1519 kmp_info_t *thread;
1520
1521 /* For the thread-private implementation of the internal controls */
1522 thread = __kmp_entry_thread();
1523
1524 __kmp_save_internal_controls( thread );
1525
1526 set__dynamic( thread, flag ? TRUE : FALSE );
1527}
1528
1529void
1530ompc_set_nested( int flag )
1531{
1532 kmp_info_t *thread;
1533
1534 /* For the thread-private internal controls implementation */
1535 thread = __kmp_entry_thread();
1536
1537 __kmp_save_internal_controls( thread );
1538
1539 set__nested( thread, flag ? TRUE : FALSE );
1540}
1541
Jim Cownie5e8470a2013-09-27 10:38:44 +00001542void
1543ompc_set_max_active_levels( int max_active_levels )
1544{
1545 /* TO DO */
1546 /* we want per-task implementation of this internal control */
1547
1548 /* For the per-thread internal controls implementation */
1549 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1550}
1551
1552void
1553ompc_set_schedule( omp_sched_t kind, int modifier )
1554{
1555// !!!!! TODO: check the per-task binding
1556 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1557}
1558
1559int
1560ompc_get_ancestor_thread_num( int level )
1561{
1562 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1563}
1564
1565int
1566ompc_get_team_size( int level )
1567{
1568 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1569}
1570
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571void
1572kmpc_set_stacksize( int arg )
1573{
1574 // __kmp_aux_set_stacksize initializes the library if needed
1575 __kmp_aux_set_stacksize( arg );
1576}
1577
1578void
1579kmpc_set_stacksize_s( size_t arg )
1580{
1581 // __kmp_aux_set_stacksize initializes the library if needed
1582 __kmp_aux_set_stacksize( arg );
1583}
1584
1585void
1586kmpc_set_blocktime( int arg )
1587{
1588 int gtid, tid;
1589 kmp_info_t *thread;
1590
1591 gtid = __kmp_entry_gtid();
1592 tid = __kmp_tid_from_gtid(gtid);
1593 thread = __kmp_thread_from_gtid(gtid);
1594
1595 __kmp_aux_set_blocktime( arg, thread, tid );
1596}
1597
1598void
1599kmpc_set_library( int arg )
1600{
1601 // __kmp_user_set_library initializes the library if needed
1602 __kmp_user_set_library( (enum library_type)arg );
1603}
1604
1605void
1606kmpc_set_defaults( char const * str )
1607{
1608 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001609 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610}
1611
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612int
1613kmpc_set_affinity_mask_proc( int proc, void **mask )
1614{
Alp Toker98758b02014-03-02 04:12:06 +00001615#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616 return -1;
1617#else
1618 if ( ! TCR_4(__kmp_init_middle) ) {
1619 __kmp_middle_initialize();
1620 }
1621 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1622#endif
1623}
1624
1625int
1626kmpc_unset_affinity_mask_proc( int proc, void **mask )
1627{
Alp Toker98758b02014-03-02 04:12:06 +00001628#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629 return -1;
1630#else
1631 if ( ! TCR_4(__kmp_init_middle) ) {
1632 __kmp_middle_initialize();
1633 }
1634 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1635#endif
1636}
1637
1638int
1639kmpc_get_affinity_mask_proc( int proc, void **mask )
1640{
Alp Toker98758b02014-03-02 04:12:06 +00001641#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642 return -1;
1643#else
1644 if ( ! TCR_4(__kmp_init_middle) ) {
1645 __kmp_middle_initialize();
1646 }
1647 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1648#endif
1649}
1650
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651
1652/* -------------------------------------------------------------------------- */
1653/*!
1654@ingroup THREADPRIVATE
1655@param loc source location information
1656@param gtid global thread number
1657@param cpy_size size of the cpy_data buffer
1658@param cpy_data pointer to data to be copied
1659@param cpy_func helper function to call for copying data
1660@param didit flag variable: 1=single thread; 0=not single thread
1661
1662__kmpc_copyprivate implements the interface for the private data broadcast needed for
1663the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1664All threads participating in the parallel region call this routine.
1665One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1666and all other threads should have that variable set to 0.
1667All threads pass a pointer to a data buffer (cpy_data) that they have built.
1668
1669The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1670clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1671race conditions, so the code generation for the single region should avoid generating a barrier
1672after the call to @ref __kmpc_copyprivate.
1673
1674The <tt>gtid</tt> parameter is the global thread id for the current thread.
1675The <tt>loc</tt> parameter is a pointer to source location information.
1676
1677Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1678to a team-private location, then the other threads will each call the function pointed to by
1679the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1680
1681The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1682and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1683the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1684to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1685
1686The interface to cpy_func is as follows:
1687@code
1688void cpy_func( void *destination, void *source )
1689@endcode
1690where void *destination is the cpy_data pointer for the thread being copied to
1691and void *source is the cpy_data pointer for the thread being copied from.
1692*/
1693void
1694__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1695{
1696 void **data_ptr;
1697
1698 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1699
1700 KMP_MB();
1701
1702 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1703
1704 if ( __kmp_env_consistency_check ) {
1705 if ( loc == 0 ) {
1706 KMP_WARNING( ConstructIdentInvalid );
1707 }
1708 }
1709
1710 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1711
1712 if (didit) *data_ptr = cpy_data;
1713
1714 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001715#if USE_ITT_NOTIFY
1716 __kmp_threads[gtid]->th.th_ident = loc;
1717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1719
1720 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1721
1722 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1723 /* Nesting checks are already handled by the single construct checks */
1724
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001725#if USE_ITT_NOTIFY
1726 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1729}
1730
1731/* -------------------------------------------------------------------------- */
1732
1733#define INIT_LOCK __kmp_init_user_lock_with_checks
1734#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1735#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1736#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1737#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1738#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1739#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1740#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1741#define TEST_LOCK __kmp_test_user_lock_with_checks
1742#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1743#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1744#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1745
1746
1747/*
1748 * TODO: Make check abort messages use location info & pass it
1749 * into with_checks routines
1750 */
1751
1752/* initialize the lock */
1753void
1754__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001755#if KMP_USE_DYNAMIC_LOCK
1756 KMP_DEBUG_ASSERT(__kmp_init_serial);
1757 if (__kmp_env_consistency_check && user_lock == NULL) {
1758 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1759 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001760 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1761 KMP_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001762# if USE_ITT_BUILD
1763 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1764# endif
1765 } else {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001766 KMP_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1767 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
1768 KMP_SET_I_LOCK_LOCATION(ilk, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001769# if USE_ITT_BUILD
1770 __kmp_itt_lock_creating(ilk->lock, loc);
1771# endif
1772 }
1773
1774#else // KMP_USE_DYNAMIC_LOCK
1775
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776 static char const * const func = "omp_init_lock";
1777 kmp_user_lock_p lck;
1778 KMP_DEBUG_ASSERT( __kmp_init_serial );
1779
1780 if ( __kmp_env_consistency_check ) {
1781 if ( user_lock == NULL ) {
1782 KMP_FATAL( LockIsUninitialized, func );
1783 }
1784 }
1785
1786 KMP_CHECK_USER_LOCK_INIT();
1787
1788 if ( ( __kmp_user_lock_kind == lk_tas )
1789 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1790 lck = (kmp_user_lock_p)user_lock;
1791 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001792#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001793 else if ( ( __kmp_user_lock_kind == lk_futex )
1794 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1795 lck = (kmp_user_lock_p)user_lock;
1796 }
1797#endif
1798 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001799 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800 }
1801 INIT_LOCK( lck );
1802 __kmp_set_user_lock_location( lck, loc );
1803
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001804#if OMPT_SUPPORT && OMPT_TRACE
1805 if (ompt_enabled &&
1806 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1807 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1808 }
1809#endif
1810
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811#if USE_ITT_BUILD
1812 __kmp_itt_lock_creating( lck );
1813#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001814
1815#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816} // __kmpc_init_lock
1817
1818/* initialize the lock */
1819void
1820__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001821#if KMP_USE_DYNAMIC_LOCK
1822
1823 KMP_DEBUG_ASSERT(__kmp_init_serial);
1824 if (__kmp_env_consistency_check && user_lock == NULL) {
1825 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1826 }
1827 // Invoke init function after converting to nested version.
1828 kmp_dyna_lockseq_t nested_seq;
1829 switch (__kmp_user_lock_seq) {
1830 case lockseq_tas: nested_seq = lockseq_nested_tas; break;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001831#if KMP_HAS_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001832 case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1833#endif
1834 case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1835 case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1836 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1837 default: nested_seq = lockseq_nested_queuing; break;
1838 // Use nested queuing lock for lock kinds without "nested" implementation.
1839 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001840 KMP_INIT_I_LOCK(user_lock, nested_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001841 // All nested locks are indirect locks.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001842 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
1843 KMP_SET_I_LOCK_LOCATION(ilk, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001844# if USE_ITT_BUILD
1845 __kmp_itt_lock_creating(ilk->lock, loc);
1846# endif
1847
1848#else // KMP_USE_DYNAMIC_LOCK
1849
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850 static char const * const func = "omp_init_nest_lock";
1851 kmp_user_lock_p lck;
1852 KMP_DEBUG_ASSERT( __kmp_init_serial );
1853
1854 if ( __kmp_env_consistency_check ) {
1855 if ( user_lock == NULL ) {
1856 KMP_FATAL( LockIsUninitialized, func );
1857 }
1858 }
1859
1860 KMP_CHECK_USER_LOCK_INIT();
1861
1862 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1863 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1864 lck = (kmp_user_lock_p)user_lock;
1865 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001866#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867 else if ( ( __kmp_user_lock_kind == lk_futex )
1868 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1869 <= OMP_NEST_LOCK_T_SIZE ) ) {
1870 lck = (kmp_user_lock_p)user_lock;
1871 }
1872#endif
1873 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001874 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875 }
1876
1877 INIT_NESTED_LOCK( lck );
1878 __kmp_set_user_lock_location( lck, loc );
1879
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001880#if OMPT_SUPPORT && OMPT_TRACE
1881 if (ompt_enabled &&
1882 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1883 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
1884 }
1885#endif
1886
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887#if USE_ITT_BUILD
1888 __kmp_itt_lock_creating( lck );
1889#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001890
1891#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001892} // __kmpc_init_nest_lock
1893
1894void
1895__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001896#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001897
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001898# if USE_ITT_BUILD
1899 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001900 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
1901 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001902 } else {
1903 lck = (kmp_user_lock_p)user_lock;
1904 }
1905 __kmp_itt_lock_destroyed(lck);
1906# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001907 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001908#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001909 kmp_user_lock_p lck;
1910
1911 if ( ( __kmp_user_lock_kind == lk_tas )
1912 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1913 lck = (kmp_user_lock_p)user_lock;
1914 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001915#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916 else if ( ( __kmp_user_lock_kind == lk_futex )
1917 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1918 lck = (kmp_user_lock_p)user_lock;
1919 }
1920#endif
1921 else {
1922 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1923 }
1924
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001925#if OMPT_SUPPORT && OMPT_TRACE
1926 if (ompt_enabled &&
1927 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
1928 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
1929 }
1930#endif
1931
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932#if USE_ITT_BUILD
1933 __kmp_itt_lock_destroyed( lck );
1934#endif /* USE_ITT_BUILD */
1935 DESTROY_LOCK( lck );
1936
1937 if ( ( __kmp_user_lock_kind == lk_tas )
1938 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1939 ;
1940 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001941#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942 else if ( ( __kmp_user_lock_kind == lk_futex )
1943 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1944 ;
1945 }
1946#endif
1947 else {
1948 __kmp_user_lock_free( user_lock, gtid, lck );
1949 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001950#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951} // __kmpc_destroy_lock
1952
1953/* destroy the lock */
1954void
1955__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001956#if KMP_USE_DYNAMIC_LOCK
1957
1958# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001959 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001960 __kmp_itt_lock_destroyed(ilk->lock);
1961# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001962 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001963
1964#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965
1966 kmp_user_lock_p lck;
1967
1968 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1969 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1970 lck = (kmp_user_lock_p)user_lock;
1971 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001972#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 else if ( ( __kmp_user_lock_kind == lk_futex )
1974 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1975 <= OMP_NEST_LOCK_T_SIZE ) ) {
1976 lck = (kmp_user_lock_p)user_lock;
1977 }
1978#endif
1979 else {
1980 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1981 }
1982
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001983#if OMPT_SUPPORT && OMPT_TRACE
1984 if (ompt_enabled &&
1985 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
1986 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
1987 }
1988#endif
1989
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990#if USE_ITT_BUILD
1991 __kmp_itt_lock_destroyed( lck );
1992#endif /* USE_ITT_BUILD */
1993
1994 DESTROY_NESTED_LOCK( lck );
1995
1996 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1997 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1998 ;
1999 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002000#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001 else if ( ( __kmp_user_lock_kind == lk_futex )
2002 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2003 <= OMP_NEST_LOCK_T_SIZE ) ) {
2004 ;
2005 }
2006#endif
2007 else {
2008 __kmp_user_lock_free( user_lock, gtid, lck );
2009 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002010#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011} // __kmpc_destroy_nest_lock
2012
2013void
2014__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002015 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002016#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002017 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002018# if USE_ITT_BUILD
2019 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2020# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002021# if KMP_USE_FAST_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002022 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002023 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002024 } else
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002025# elif KMP_USE_FAST_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002026 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002027 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002028 } else
2029# endif
2030 {
2031 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2032 }
2033# if USE_ITT_BUILD
2034 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2035# endif
2036
2037#else // KMP_USE_DYNAMIC_LOCK
2038
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039 kmp_user_lock_p lck;
2040
2041 if ( ( __kmp_user_lock_kind == lk_tas )
2042 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2043 lck = (kmp_user_lock_p)user_lock;
2044 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002045#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046 else if ( ( __kmp_user_lock_kind == lk_futex )
2047 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2048 lck = (kmp_user_lock_p)user_lock;
2049 }
2050#endif
2051 else {
2052 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2053 }
2054
2055#if USE_ITT_BUILD
2056 __kmp_itt_lock_acquiring( lck );
2057#endif /* USE_ITT_BUILD */
2058
2059 ACQUIRE_LOCK( lck, gtid );
2060
2061#if USE_ITT_BUILD
2062 __kmp_itt_lock_acquired( lck );
2063#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002065#if OMPT_SUPPORT && OMPT_TRACE
2066 if (ompt_enabled &&
2067 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2068 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2069 }
2070#endif
2071
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002072#endif // KMP_USE_DYNAMIC_LOCK
2073}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002074
2075void
2076__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002077#if KMP_USE_DYNAMIC_LOCK
2078
2079# if USE_ITT_BUILD
2080 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2081# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002082 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002083# if USE_ITT_BUILD
2084 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2085#endif
2086
2087#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002088 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089 kmp_user_lock_p lck;
2090
2091 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2092 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2093 lck = (kmp_user_lock_p)user_lock;
2094 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002095#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 else if ( ( __kmp_user_lock_kind == lk_futex )
2097 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2098 <= OMP_NEST_LOCK_T_SIZE ) ) {
2099 lck = (kmp_user_lock_p)user_lock;
2100 }
2101#endif
2102 else {
2103 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2104 }
2105
2106#if USE_ITT_BUILD
2107 __kmp_itt_lock_acquiring( lck );
2108#endif /* USE_ITT_BUILD */
2109
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002110 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002111
2112#if USE_ITT_BUILD
2113 __kmp_itt_lock_acquired( lck );
2114#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002115#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002116
2117#if OMPT_SUPPORT && OMPT_TRACE
2118 if (ompt_enabled) {
2119 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2120 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2121 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2122 } else {
2123 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2124 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2125 }
2126 }
2127#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002128}
2129
2130void
2131__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2132{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002133#if KMP_USE_DYNAMIC_LOCK
2134
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002135 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002136# if USE_ITT_BUILD
2137 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2138# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002139# if KMP_USE_FAST_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002140 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002141 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002142 } else
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002143# elif KMP_USE_FAST_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002144 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002145 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002146 } else
2147# endif
2148 {
2149 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2150 }
2151
2152#else // KMP_USE_DYNAMIC_LOCK
2153
Jim Cownie5e8470a2013-09-27 10:38:44 +00002154 kmp_user_lock_p lck;
2155
2156 /* Can't use serial interval since not block structured */
2157 /* release the lock */
2158
2159 if ( ( __kmp_user_lock_kind == lk_tas )
2160 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002161#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162 // "fast" path implemented to fix customer performance issue
2163#if USE_ITT_BUILD
2164 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2165#endif /* USE_ITT_BUILD */
2166 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2167 KMP_MB();
2168 return;
2169#else
2170 lck = (kmp_user_lock_p)user_lock;
2171#endif
2172 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002173#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174 else if ( ( __kmp_user_lock_kind == lk_futex )
2175 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2176 lck = (kmp_user_lock_p)user_lock;
2177 }
2178#endif
2179 else {
2180 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2181 }
2182
2183#if USE_ITT_BUILD
2184 __kmp_itt_lock_releasing( lck );
2185#endif /* USE_ITT_BUILD */
2186
2187 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002188
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002189#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002190 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002191 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2192 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2193 }
2194#endif
2195
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002196#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002197}
2198
2199/* release the lock */
2200void
2201__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2202{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002203#if KMP_USE_DYNAMIC_LOCK
2204
2205# if USE_ITT_BUILD
2206 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2207# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002208 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002209
2210#else // KMP_USE_DYNAMIC_LOCK
2211
Jim Cownie5e8470a2013-09-27 10:38:44 +00002212 kmp_user_lock_p lck;
2213
2214 /* Can't use serial interval since not block structured */
2215
2216 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2217 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002218#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002219 // "fast" path implemented to fix customer performance issue
2220 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2221#if USE_ITT_BUILD
2222 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2223#endif /* USE_ITT_BUILD */
2224 if ( --(tl->lk.depth_locked) == 0 ) {
2225 TCW_4(tl->lk.poll, 0);
2226 }
2227 KMP_MB();
2228 return;
2229#else
2230 lck = (kmp_user_lock_p)user_lock;
2231#endif
2232 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002233#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002234 else if ( ( __kmp_user_lock_kind == lk_futex )
2235 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2236 <= OMP_NEST_LOCK_T_SIZE ) ) {
2237 lck = (kmp_user_lock_p)user_lock;
2238 }
2239#endif
2240 else {
2241 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2242 }
2243
2244#if USE_ITT_BUILD
2245 __kmp_itt_lock_releasing( lck );
2246#endif /* USE_ITT_BUILD */
2247
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002248 int release_status;
2249 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002250#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002251 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002252 if (release_status == KMP_LOCK_RELEASED) {
2253 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2254 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2255 (uint64_t) lck);
2256 }
2257 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2258 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2259 (uint64_t) lck);
2260 }
2261 }
2262#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002263
2264#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265}
2266
2267/* try to acquire the lock */
2268int
2269__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2270{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002271 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002272
2273#if KMP_USE_DYNAMIC_LOCK
2274 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002275 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002276# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002277 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002278# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002279# if KMP_USE_FAST_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002280 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002281 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002282 } else
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002283# elif KMP_USE_FAST_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002284 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002285 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002286 } else
2287# endif
2288 {
2289 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2290 }
2291 if (rc) {
2292# if USE_ITT_BUILD
2293 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2294# endif
2295 return FTN_TRUE;
2296 } else {
2297# if USE_ITT_BUILD
2298 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2299# endif
2300 return FTN_FALSE;
2301 }
2302
2303#else // KMP_USE_DYNAMIC_LOCK
2304
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305 kmp_user_lock_p lck;
2306 int rc;
2307
2308 if ( ( __kmp_user_lock_kind == lk_tas )
2309 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2310 lck = (kmp_user_lock_p)user_lock;
2311 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002312#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313 else if ( ( __kmp_user_lock_kind == lk_futex )
2314 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2315 lck = (kmp_user_lock_p)user_lock;
2316 }
2317#endif
2318 else {
2319 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2320 }
2321
2322#if USE_ITT_BUILD
2323 __kmp_itt_lock_acquiring( lck );
2324#endif /* USE_ITT_BUILD */
2325
2326 rc = TEST_LOCK( lck, gtid );
2327#if USE_ITT_BUILD
2328 if ( rc ) {
2329 __kmp_itt_lock_acquired( lck );
2330 } else {
2331 __kmp_itt_lock_cancelled( lck );
2332 }
2333#endif /* USE_ITT_BUILD */
2334 return ( rc ? FTN_TRUE : FTN_FALSE );
2335
2336 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002337
2338#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339}
2340
2341/* try to acquire the lock */
2342int
2343__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2344{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002345#if KMP_USE_DYNAMIC_LOCK
2346 int rc;
2347# if USE_ITT_BUILD
2348 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2349# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002350 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002351# if USE_ITT_BUILD
2352 if (rc) {
2353 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2354 } else {
2355 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2356 }
2357# endif
2358 return rc;
2359
2360#else // KMP_USE_DYNAMIC_LOCK
2361
Jim Cownie5e8470a2013-09-27 10:38:44 +00002362 kmp_user_lock_p lck;
2363 int rc;
2364
2365 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2366 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2367 lck = (kmp_user_lock_p)user_lock;
2368 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002369#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002370 else if ( ( __kmp_user_lock_kind == lk_futex )
2371 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2372 <= OMP_NEST_LOCK_T_SIZE ) ) {
2373 lck = (kmp_user_lock_p)user_lock;
2374 }
2375#endif
2376 else {
2377 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2378 }
2379
2380#if USE_ITT_BUILD
2381 __kmp_itt_lock_acquiring( lck );
2382#endif /* USE_ITT_BUILD */
2383
2384 rc = TEST_NESTED_LOCK( lck, gtid );
2385#if USE_ITT_BUILD
2386 if ( rc ) {
2387 __kmp_itt_lock_acquired( lck );
2388 } else {
2389 __kmp_itt_lock_cancelled( lck );
2390 }
2391#endif /* USE_ITT_BUILD */
2392 return rc;
2393
2394 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002395
2396#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002397}
2398
2399
2400/*--------------------------------------------------------------------------------------------------------------------*/
2401
2402/*
2403 * Interface to fast scalable reduce methods routines
2404 */
2405
2406// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2407// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2408// AT: which solution is better?
2409#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2410 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2411
2412#define __KMP_GET_REDUCTION_METHOD(gtid) \
2413 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2414
2415// description of the packed_reduction_method variable: look at the macros in kmp.h
2416
2417
2418// used in a critical section reduce block
2419static __forceinline void
2420__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2421
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002422 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423 // (although it's used for an internal purpose only)
2424 // why was it visible in previous implementation?
2425 // should we keep it visible in new reduce block?
2426 kmp_user_lock_p lck;
2427
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002428#if KMP_USE_DYNAMIC_LOCK
2429
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002430 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002431 lck = (kmp_user_lock_p)crit;
2432 if (*((kmp_dyna_lock_t *)lck) == 0) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002433 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002434 }
2435 KMP_DEBUG_ASSERT(lck != NULL);
2436 if (__kmp_env_consistency_check) {
2437 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2438 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002439 KMP_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002440 } else {
2441 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2442 KMP_DEBUG_ASSERT(ilk != NULL);
2443 if (__kmp_env_consistency_check) {
2444 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2445 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002446 KMP_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002447 }
2448
2449#else // KMP_USE_DYNAMIC_LOCK
2450
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451 // We know that the fast reduction code is only emitted by Intel compilers
2452 // with 32 byte critical sections. If there isn't enough space, then we
2453 // have to use a pointer.
2454 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2455 lck = (kmp_user_lock_p)crit;
2456 }
2457 else {
2458 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2459 }
2460 KMP_DEBUG_ASSERT( lck != NULL );
2461
2462 if ( __kmp_env_consistency_check )
2463 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2464
2465 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002466
2467#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468}
2469
2470// used in a critical section reduce block
2471static __forceinline void
2472__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2473
2474 kmp_user_lock_p lck;
2475
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002476#if KMP_USE_DYNAMIC_LOCK
2477
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002478 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002479 lck = (kmp_user_lock_p)crit;
2480 if (__kmp_env_consistency_check)
2481 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002482 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002483 } else {
2484 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2485 if (__kmp_env_consistency_check)
2486 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002487 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002488 }
2489
2490#else // KMP_USE_DYNAMIC_LOCK
2491
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2493 // sections. If there isn't enough space, then we have to use a pointer.
2494 if ( __kmp_base_user_lock_size > 32 ) {
2495 lck = *( (kmp_user_lock_p *) crit );
2496 KMP_ASSERT( lck != NULL );
2497 } else {
2498 lck = (kmp_user_lock_p) crit;
2499 }
2500
2501 if ( __kmp_env_consistency_check )
2502 __kmp_pop_sync( global_tid, ct_critical, loc );
2503
2504 __kmp_release_user_lock_with_checks( lck, global_tid );
2505
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002506#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507} // __kmp_end_critical_section_reduce_block
2508
2509
2510/* 2.a.i. Reduce Block without a terminating barrier */
2511/*!
2512@ingroup SYNCHRONIZATION
2513@param loc source location information
2514@param global_tid global thread number
2515@param num_vars number of items (variables) to be reduced
2516@param reduce_size size of data in bytes to be reduced
2517@param reduce_data pointer to data to be reduced
2518@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2519@param lck pointer to the unique lock data structure
2520@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2521
2522The nowait version is used for a reduce clause with the nowait argument.
2523*/
2524kmp_int32
2525__kmpc_reduce_nowait(
2526 ident_t *loc, kmp_int32 global_tid,
2527 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2528 kmp_critical_name *lck ) {
2529
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002530 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002531 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002533#if OMP_40_ENABLED
2534 kmp_team_t *team;
2535 kmp_info_t *th;
2536 int teams_swapped = 0, task_state;
2537#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2539
2540 // why do we need this initialization here at all?
2541 // Reduction clause can not be used as a stand-alone directive.
2542
2543 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2544 // possible detection of false-positive race by the threadchecker ???
2545 if( ! TCR_4( __kmp_init_parallel ) )
2546 __kmp_parallel_initialize();
2547
2548 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002549#if KMP_USE_DYNAMIC_LOCK
2550 if ( __kmp_env_consistency_check )
2551 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2552#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 if ( __kmp_env_consistency_check )
2554 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002555#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002556
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002557#if OMP_40_ENABLED
2558 th = __kmp_thread_from_gtid(global_tid);
2559 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2560 team = th->th.th_team;
2561 if( team->t.t_level == th->th.th_teams_level ) {
2562 // this is reduction at teams construct
2563 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2564 // Let's swap teams temporarily for the reduction barrier
2565 teams_swapped = 1;
2566 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2567 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002568 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002569 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002570 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002571 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002572 }
2573 }
2574#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002575
2576 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2577 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2578 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2579 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2580 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2581 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2582 // a thread-specific "th_local.reduction_method" variable is used currently
2583 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2584
2585 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2586 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2587
2588 if( packed_reduction_method == critical_reduce_block ) {
2589
2590 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2591 retval = 1;
2592
2593 } else if( packed_reduction_method == empty_reduce_block ) {
2594
2595 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2596 retval = 1;
2597
2598 } else if( packed_reduction_method == atomic_reduce_block ) {
2599
2600 retval = 2;
2601
2602 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2603 // (it's not quite good, because the checking block has been closed by this 'pop',
2604 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2605 if ( __kmp_env_consistency_check )
2606 __kmp_pop_sync( global_tid, ct_reduce, loc );
2607
2608 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2609
2610 //AT: performance issue: a real barrier here
2611 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2612 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2613 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2614 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2615 // and be more in line with sense of NOWAIT
2616 //AT: TO DO: do epcc test and compare times
2617
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002618 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002619 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002620#if USE_ITT_NOTIFY
2621 __kmp_threads[global_tid]->th.th_ident = loc;
2622#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2624 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2625
2626 // all other workers except master should do this pop here
2627 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2628 if ( __kmp_env_consistency_check ) {
2629 if( retval == 0 ) {
2630 __kmp_pop_sync( global_tid, ct_reduce, loc );
2631 }
2632 }
2633
2634 } else {
2635
2636 // should never reach this block
2637 KMP_ASSERT( 0 ); // "unexpected method"
2638
2639 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002640#if OMP_40_ENABLED
2641 if( teams_swapped ) {
2642 // Restore thread structure
2643 th->th.th_info.ds.ds_tid = 0;
2644 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002645 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002646 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002647 th->th.th_task_state = task_state;
2648 }
2649#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002650 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2651
2652 return retval;
2653}
2654
2655/*!
2656@ingroup SYNCHRONIZATION
2657@param loc source location information
2658@param global_tid global thread id.
2659@param lck pointer to the unique lock data structure
2660
2661Finish the execution of a reduce nowait.
2662*/
2663void
2664__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2665
2666 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2667
2668 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2669
2670 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2671
2672 if( packed_reduction_method == critical_reduce_block ) {
2673
2674 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2675
2676 } else if( packed_reduction_method == empty_reduce_block ) {
2677
2678 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2679
2680 } else if( packed_reduction_method == atomic_reduce_block ) {
2681
2682 // neither master nor other workers should get here
2683 // (code gen does not generate this call in case 2: atomic reduce block)
2684 // actually it's better to remove this elseif at all;
2685 // after removal this value will checked by the 'else' and will assert
2686
2687 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2688
2689 // only master gets here
2690
2691 } else {
2692
2693 // should never reach this block
2694 KMP_ASSERT( 0 ); // "unexpected method"
2695
2696 }
2697
2698 if ( __kmp_env_consistency_check )
2699 __kmp_pop_sync( global_tid, ct_reduce, loc );
2700
2701 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2702
2703 return;
2704}
2705
2706/* 2.a.ii. Reduce Block with a terminating barrier */
2707
2708/*!
2709@ingroup SYNCHRONIZATION
2710@param loc source location information
2711@param global_tid global thread number
2712@param num_vars number of items (variables) to be reduced
2713@param reduce_size size of data in bytes to be reduced
2714@param reduce_data pointer to data to be reduced
2715@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2716@param lck pointer to the unique lock data structure
2717@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2718
2719A blocking reduce that includes an implicit barrier.
2720*/
2721kmp_int32
2722__kmpc_reduce(
2723 ident_t *loc, kmp_int32 global_tid,
2724 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2725 void (*reduce_func)(void *lhs_data, void *rhs_data),
2726 kmp_critical_name *lck )
2727{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002728 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002729 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002730 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2731
2732 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2733
2734 // why do we need this initialization here at all?
2735 // Reduction clause can not be a stand-alone directive.
2736
2737 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2738 // possible detection of false-positive race by the threadchecker ???
2739 if( ! TCR_4( __kmp_init_parallel ) )
2740 __kmp_parallel_initialize();
2741
2742 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002743#if KMP_USE_DYNAMIC_LOCK
2744 if ( __kmp_env_consistency_check )
2745 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2746#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747 if ( __kmp_env_consistency_check )
2748 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002749#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002750
Jim Cownie5e8470a2013-09-27 10:38:44 +00002751 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2752 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2753
2754 if( packed_reduction_method == critical_reduce_block ) {
2755
2756 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2757 retval = 1;
2758
2759 } else if( packed_reduction_method == empty_reduce_block ) {
2760
2761 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2762 retval = 1;
2763
2764 } else if( packed_reduction_method == atomic_reduce_block ) {
2765
2766 retval = 2;
2767
2768 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2769
2770 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002771 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002772 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002773#if USE_ITT_NOTIFY
2774 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2775#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2777 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2778
2779 // all other workers except master should do this pop here
2780 // ( none of other workers except master will enter __kmpc_end_reduce() )
2781 if ( __kmp_env_consistency_check ) {
2782 if( retval == 0 ) { // 0: all other workers; 1: master
2783 __kmp_pop_sync( global_tid, ct_reduce, loc );
2784 }
2785 }
2786
2787 } else {
2788
2789 // should never reach this block
2790 KMP_ASSERT( 0 ); // "unexpected method"
2791
2792 }
2793
2794 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2795
2796 return retval;
2797}
2798
2799/*!
2800@ingroup SYNCHRONIZATION
2801@param loc source location information
2802@param global_tid global thread id.
2803@param lck pointer to the unique lock data structure
2804
2805Finish the execution of a blocking reduce.
2806The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2807*/
2808void
2809__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2810
2811 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2812
2813 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2814
2815 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2816
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002817 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818 // (it's a terminating barrier on constructs if NOWAIT not specified)
2819
2820 if( packed_reduction_method == critical_reduce_block ) {
2821
2822 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2823
2824 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002825#if USE_ITT_NOTIFY
2826 __kmp_threads[global_tid]->th.th_ident = loc;
2827#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2829
2830 } else if( packed_reduction_method == empty_reduce_block ) {
2831
2832 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2833
2834 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002835#if USE_ITT_NOTIFY
2836 __kmp_threads[global_tid]->th.th_ident = loc;
2837#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2839
2840 } else if( packed_reduction_method == atomic_reduce_block ) {
2841
2842 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002843#if USE_ITT_NOTIFY
2844 __kmp_threads[global_tid]->th.th_ident = loc;
2845#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002846 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2847
2848 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2849
2850 // only master executes here (master releases all other workers)
2851 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2852
2853 } else {
2854
2855 // should never reach this block
2856 KMP_ASSERT( 0 ); // "unexpected method"
2857
2858 }
2859
2860 if ( __kmp_env_consistency_check )
2861 __kmp_pop_sync( global_tid, ct_reduce, loc );
2862
2863 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2864
2865 return;
2866}
2867
2868#undef __KMP_GET_REDUCTION_METHOD
2869#undef __KMP_SET_REDUCTION_METHOD
2870
2871/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2872
2873kmp_uint64
2874__kmpc_get_taskid() {
2875
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002876 kmp_int32 gtid;
2877 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002878
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002879 gtid = __kmp_get_gtid();
2880 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002882 }; // if
2883 thread = __kmp_thread_from_gtid( gtid );
2884 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885
2886} // __kmpc_get_taskid
2887
2888
2889kmp_uint64
2890__kmpc_get_parent_taskid() {
2891
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002892 kmp_int32 gtid;
2893 kmp_info_t * thread;
2894 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002895
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002896 gtid = __kmp_get_gtid();
2897 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002898 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002899 }; // if
2900 thread = __kmp_thread_from_gtid( gtid );
2901 parent_task = thread->th.th_current_task->td_parent;
2902 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903
2904} // __kmpc_get_parent_taskid
2905
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002906void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002907{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002908 if ( ! __kmp_init_serial ) {
2909 __kmp_serial_initialize();
2910 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002911 __kmp_place_num_sockets = nS;
2912 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002913 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00002914 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916}
2917
2918// end of file //
2919