blob: 905f596c964cd8b8784e5798d43b45d53fcc7e69 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
24#include "ompt-internal.h"
25#include "ompt-specific.h"
26#endif
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#define MAX_MESSAGE 512
29
30/* ------------------------------------------------------------------------ */
31/* ------------------------------------------------------------------------ */
32
33/* flags will be used in future, e.g., to implement */
34/* openmp_strict library restrictions */
35
36/*!
37 * @ingroup STARTUP_SHUTDOWN
38 * @param loc in source location information
39 * @param flags in for future use (currently ignored)
40 *
41 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000043 *
44 */
45void
46__kmpc_begin(ident_t *loc, kmp_int32 flags)
47{
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 if (__kmp_ignore_mppbeg() == FALSE) {
50 __kmp_internal_begin();
51
52 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53 }
54}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
60 * Shutdown the runtime library. This is also optional, and even if called will not
61 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
62 */
63void
64__kmpc_end(ident_t *loc)
65{
66 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69 // will unregister this root (it can cause library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72 KA_TRACE( 30, ("__kmpc_end\n" ));
73
74 __kmp_internal_end_thread( -1 );
75 }
76}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
86single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000087which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000088active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
95
96*/
97kmp_int32
98__kmpc_global_thread_num(ident_t *loc)
99{
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime. That is
114not a number that can be determined by any OpenMP standard calls, since the library may be
115called from more than one non-OpenMP thread, and this reflects the total over all such calls.
116Similarly the runtime maintains underlying threads even when they are not active (since the cost
117of creating and destroying OS threads is high), this call counts all such threads even if they are not
118waiting for work.
119*/
120kmp_int32
121__kmpc_global_num_threads(ident_t *loc)
122{
123 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124
125 return TCR_4(__kmp_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel construct.
132
133*/
134kmp_int32
135__kmpc_bound_thread_num(ident_t *loc)
136{
137 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
146kmp_int32
147__kmpc_bound_num_threads(ident_t *loc)
148{
149 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150
151 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152}
153
154/*!
155 * @ingroup DEPRECATED
156 * @param loc location description
157 *
158 * This function need not be called. It always returns TRUE.
159 */
160kmp_int32
161__kmpc_ok_to_fork(ident_t *loc)
162{
163#ifndef KMP_DEBUG
164
165 return TRUE;
166
167#else
168
169 const char *semi2;
170 const char *semi3;
171 int line_no;
172
173 if (__kmp_par_range == 0) {
174 return TRUE;
175 }
176 semi2 = loc->psource;
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 semi2 = strchr(semi2 + 1, ';');
185 if (semi2 == NULL) {
186 return TRUE;
187 }
188 if (__kmp_par_range_filename[0]) {
189 const char *name = semi2 - 1;
190 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191 name--;
192 }
193 if ((*name == '/') || (*name == ';')) {
194 name++;
195 }
196 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197 return __kmp_par_range < 0;
198 }
199 }
200 semi3 = strchr(semi2 + 1, ';');
201 if (__kmp_par_range_routine[0]) {
202 if ((semi3 != NULL) && (semi3 > semi2)
203 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204 return __kmp_par_range < 0;
205 }
206 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000207 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209 return __kmp_par_range > 0;
210 }
211 return __kmp_par_range < 0;
212 }
213 return TRUE;
214
215#endif /* KMP_DEBUG */
216
217}
218
219/*!
220@ingroup THREAD_STATES
221@param loc Source location information.
222@return 1 if this thread is executing inside an active parallel region, zero if not.
223*/
224kmp_int32
225__kmpc_in_parallel( ident_t *loc )
226{
227 return __kmp_entry_thread() -> th.th_root -> r.r_active;
228}
229
230/*!
231@ingroup PARALLEL
232@param loc source location information
233@param global_tid global thread number
234@param num_threads number of threads requested for this parallel construct
235
236Set the number of threads to be used by the next fork spawned by this thread.
237This call is only required if the parallel construct has a `num_threads` clause.
238*/
239void
240__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241{
242 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243 global_tid, num_threads ) );
244
245 __kmp_push_num_threads( loc, global_tid, num_threads );
246}
247
248void
249__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250{
251 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252
253 /* the num_threads are automatically popped */
254}
255
256
257#if OMP_40_ENABLED
258
259void
260__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261{
262 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263 global_tid, proc_bind ) );
264
265 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266}
267
268#endif /* OMP_40_ENABLED */
269
270
271/*!
272@ingroup PARALLEL
273@param loc source location information
274@param argc total number of arguments in the ellipsis
275@param microtask pointer to callback routine consisting of outlined parallel construct
276@param ... pointers to shared variables that aren't global
277
278Do the actual fork and call the microtask in the relevant number of threads.
279*/
280void
281__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282{
283 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000284
285#if (KMP_STATS_ENABLED)
286 int inParallel = __kmpc_in_parallel(loc);
287 if (inParallel)
288 {
289 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
290 }
291 else
292 {
293 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000304 int tid = __kmp_tid_from_gtid( gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000305 kmp_info_t *master_th = __kmp_threads[ gtid ];
306 kmp_team_t *parent_team = master_th->th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000307 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000308 parent_team->t.t_implicit_task_taskdata[tid].
309 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
316 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000318#if OMPT_SUPPORT
319 VOLATILE_CAST(void *) microtask, // "unwrapped" task
320#endif
321 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
323/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000324#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 &ap
326#else
327 ap
328#endif
329 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330#if INCLUDE_SSC_MARKS
331 SSC_MARK_JOINING();
332#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333 __kmp_join_call( loc, gtid
334#if OMPT_SUPPORT
335 , fork_context_intel
336#endif
337 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338
339 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000340
341#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000342 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000343 parent_team->t.t_implicit_task_taskdata[tid].
344 ompt_task_info.frame.reenter_runtime_frame = 0;
345 }
346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000348#if (KMP_STATS_ENABLED)
349 if (!inParallel)
350 KMP_START_EXPLICIT_TIMER(OMP_serial);
351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352}
353
354#if OMP_40_ENABLED
355/*!
356@ingroup PARALLEL
357@param loc source location information
358@param global_tid global thread number
359@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000360@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361
362Set the number of teams to be used by the teams construct.
363This call is only required if the teams construct has a `num_teams` clause
364or a `thread_limit` clause (or both).
365*/
366void
367__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
368{
369 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
370 global_tid, num_teams, num_threads ) );
371
372 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
373}
374
375/*!
376@ingroup PARALLEL
377@param loc source location information
378@param argc total number of arguments in the ellipsis
379@param microtask pointer to callback routine consisting of outlined teams construct
380@param ... pointers to shared variables that aren't global
381
382Do the actual fork and call the microtask in the relevant number of threads.
383*/
384void
385__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
386{
387 int gtid = __kmp_entry_gtid();
388 kmp_info_t *this_thr = __kmp_threads[ gtid ];
389 va_list ap;
390 va_start( ap, microtask );
391
Jonathan Peyton45be4502015-08-11 21:36:41 +0000392 KMP_COUNT_BLOCK(OMP_TEAMS);
393
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000395 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
397
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000398#if OMPT_SUPPORT
399 kmp_team_t *parent_team = this_thr->th.th_team;
400 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000401 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000402 parent_team->t.t_implicit_task_taskdata[tid].
403 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
404 }
405#endif
406
Jim Cownie5e8470a2013-09-27 10:38:44 +0000407 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000408 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000409 __kmp_push_num_teams( loc, gtid, 0, 0 );
410 }
411 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
413 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000415 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000417#if OMPT_SUPPORT
418 VOLATILE_CAST(void *) microtask, // "unwrapped" task
419#endif
420 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000421 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000422#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000423 &ap
424#else
425 ap
426#endif
427 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000428 __kmp_join_call( loc, gtid
429#if OMPT_SUPPORT
430 , fork_context_intel
431#endif
432 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000433
434#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000435 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000436 parent_team->t.t_implicit_task_taskdata[tid].
437 ompt_task_info.frame.reenter_runtime_frame = NULL;
438 }
439#endif
440
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000441 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000442 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000443 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000444 va_end( ap );
445}
446#endif /* OMP_40_ENABLED */
447
448
449//
450// I don't think this function should ever have been exported.
451// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
452// openmp code ever called it, but it's been exported from the RTL for so
453// long that I'm afraid to remove the definition.
454//
455int
456__kmpc_invoke_task_func( int gtid )
457{
458 return __kmp_invoke_task_func( gtid );
459}
460
461/*!
462@ingroup PARALLEL
463@param loc source location information
464@param global_tid global thread number
465
466Enter a serialized parallel construct. This interface is used to handle a
467conditional parallel region, like this,
468@code
469#pragma omp parallel if (condition)
470@endcode
471when the condition is false.
472*/
473void
474__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
475{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000476 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
477 * kmp_fork_call since the tasks to be done are similar in each case.
478 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479}
480
481/*!
482@ingroup PARALLEL
483@param loc source location information
484@param global_tid global thread number
485
486Leave a serialized parallel construct.
487*/
488void
489__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
490{
491 kmp_internal_control_t *top;
492 kmp_info_t *this_thr;
493 kmp_team_t *serial_team;
494
495 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
496
497 /* skip all this code for autopar serialized loops since it results in
498 unacceptable overhead */
499 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
500 return;
501
502 // Not autopar code
503 if( ! TCR_4( __kmp_init_parallel ) )
504 __kmp_parallel_initialize();
505
506 this_thr = __kmp_threads[ global_tid ];
507 serial_team = this_thr->th.th_serial_team;
508
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000509 #if OMP_41_ENABLED
510 kmp_task_team_t * task_team = this_thr->th.th_task_team;
511
512 // we need to wait for the proxy tasks before finishing the thread
513 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
514 __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
515 #endif
516
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517 KMP_MB();
518 KMP_DEBUG_ASSERT( serial_team );
519 KMP_ASSERT( serial_team -> t.t_serialized );
520 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
521 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
522 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
523 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
524
525 /* If necessary, pop the internal control stack values and replace the team values */
526 top = serial_team -> t.t_control_stack_top;
527 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529 serial_team -> t.t_control_stack_top = top -> next;
530 __kmp_free(top);
531 }
532
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 //if( serial_team -> t.t_serialized > 1 )
534 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535
536 /* pop dispatch buffers stack */
537 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
538 {
539 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
540 serial_team->t.t_dispatch->th_disp_buffer =
541 serial_team->t.t_dispatch->th_disp_buffer->next;
542 __kmp_free( disp_buffer );
543 }
544
545 -- serial_team -> t.t_serialized;
546 if ( serial_team -> t.t_serialized == 0 ) {
547
548 /* return to the parallel section */
549
550#if KMP_ARCH_X86 || KMP_ARCH_X86_64
551 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
552 __kmp_clear_x87_fpu_status_word();
553 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
554 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
555 }
556#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
557
558 this_thr -> th.th_team = serial_team -> t.t_parent;
559 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
560
561 /* restore values cached in the thread */
562 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
563 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
564 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
565
566 /* TODO the below shouldn't need to be adjusted for serialized teams */
567 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
568 t.t_dispatch[ serial_team -> t.t_master_tid ];
569
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570 __kmp_pop_current_task_from_thread( this_thr );
571
572 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
573 this_thr -> th.th_current_task -> td_flags.executing = 1;
574
575 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000576 // Copy the task team from the new child / old parent team to the thread.
577 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
579 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
580 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000581 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
583 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
584 global_tid, serial_team, serial_team -> t.t_serialized ) );
585 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586 }
587
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000588#if USE_ITT_BUILD
589 kmp_uint64 cur_time = 0;
590#if USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000591 if ( __itt_get_timestamp_ptr ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000592 cur_time = __itt_get_timestamp();
593 }
594#endif /* USE_ITT_NOTIFY */
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000595 if ( this_thr->th.th_team->t.t_level == 0
596#if OMP_40_ENABLED
597 && this_thr->th.th_teams_microtask == NULL
598#endif
599 ) {
600 // Report the barrier
Jim Cownie181b4bb2013-12-23 17:28:57 +0000601 this_thr->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000602 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
603 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
604 {
605 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
606 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
607 if ( __kmp_forkjoin_frames_mode == 3 )
608 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
609 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
610 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
611 } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
612 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
613 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
614 __kmp_itt_region_joined( global_tid, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615 }
616#endif /* USE_ITT_BUILD */
617
618 if ( __kmp_env_consistency_check )
619 __kmp_pop_parallel( global_tid, NULL );
620}
621
622/*!
623@ingroup SYNCHRONIZATION
624@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000625
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000626Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627depending on the memory ordering convention obeyed by the compiler
628even that may not be necessary).
629*/
630void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000631__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632{
633 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
634
635 /* need explicit __mf() here since use volatile instead in library */
636 KMP_MB(); /* Flush all pending memory write invalidates. */
637
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000638 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
639 #if KMP_MIC
640 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
641 // We shouldn't need it, though, since the ABI rules require that
642 // * If the compiler generates NGO stores it also generates the fence
643 // * If users hand-code NGO stores they should insert the fence
644 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646 // C74404
647 // This is to address non-temporal store instructions (sfence needed).
648 // The clflush instruction is addressed either (mfence needed).
649 // Probably the non-temporal load monvtdqa instruction should also be addressed.
650 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651 if ( ! __kmp_cpuinfo.initialized ) {
652 __kmp_query_cpuid( & __kmp_cpuinfo );
653 }; // if
654 if ( ! __kmp_cpuinfo.sse2 ) {
655 // CPU cannot execute SSE2 instructions.
656 } else {
657 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
658 _mm_mfence();
659 #else
660 __sync_synchronize();
661 #endif // KMP_COMPILER_ICC
662 }; // if
663 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000664 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
665 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 #elif KMP_ARCH_PPC64
667 // Nothing needed here (we have a real MB above).
668 #if KMP_OS_CNK
669 // The flushing thread needs to yield here; this prevents a
670 // busy-waiting thread from saturating the pipeline. flush is
671 // often used in loops like this:
672 // while (!flag) {
673 // #pragma omp flush(flag)
674 // }
675 // and adding the yield here is good for at least a 10x speedup
676 // when running >2 threads per core (on the NAS LU benchmark).
677 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000679 #else
680 #error Unknown or unsupported architecture
681 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682
683}
684
685/* -------------------------------------------------------------------------- */
686
687/* -------------------------------------------------------------------------- */
688
689/*!
690@ingroup SYNCHRONIZATION
691@param loc source location information
692@param global_tid thread id.
693
694Execute a barrier.
695*/
696void
697__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
698{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000699 KMP_COUNT_BLOCK(OMP_BARRIER);
700 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
702
703 if (! TCR_4(__kmp_init_parallel))
704 __kmp_parallel_initialize();
705
706 if ( __kmp_env_consistency_check ) {
707 if ( loc == 0 ) {
708 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
709 }; // if
710
711 __kmp_check_barrier( global_tid, ct_barrier, loc );
712 }
713
714 __kmp_threads[ global_tid ]->th.th_ident = loc;
715 // TODO: explicit barrier_wait_id:
716 // this function is called when 'barrier' directive is present or
717 // implicit barrier at the end of a worksharing construct.
718 // 1) better to add a per-thread barrier counter to a thread data structure
719 // 2) set to 0 when a new team is created
720 // 4) no sync is required
721
722 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
723}
724
725/* The BARRIER for a MASTER section is always explicit */
726/*!
727@ingroup WORK_SHARING
728@param loc source location information.
729@param global_tid global thread number .
730@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
731*/
732kmp_int32
733__kmpc_master(ident_t *loc, kmp_int32 global_tid)
734{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000735 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 int status = 0;
737
738 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
739
740 if( ! TCR_4( __kmp_init_parallel ) )
741 __kmp_parallel_initialize();
742
Jonathan Peyton45be4502015-08-11 21:36:41 +0000743 if( KMP_MASTER_GTID( global_tid )) {
744 KMP_START_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000746 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000748#if OMPT_SUPPORT && OMPT_TRACE
749 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000750 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000751 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000752 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
753 kmp_team_t *team = this_thr -> th.th_team;
754
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000755 int tid = __kmp_tid_from_gtid( global_tid );
756 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
757 team->t.ompt_team_info.parallel_id,
758 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
759 }
760 }
761#endif
762
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000764#if KMP_USE_DYNAMIC_LOCK
765 if (status)
766 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
767 else
768 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
769#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770 if (status)
771 __kmp_push_sync( global_tid, ct_master, loc, NULL );
772 else
773 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775 }
776
777 return status;
778}
779
780/*!
781@ingroup WORK_SHARING
782@param loc source location information.
783@param global_tid global thread number .
784
785Mark the end of a <tt>master</tt> region. This should only be called by the thread
786that executes the <tt>master</tt> region.
787*/
788void
789__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
790{
791 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
792
793 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton45be4502015-08-11 21:36:41 +0000794 KMP_STOP_EXPLICIT_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000796#if OMPT_SUPPORT && OMPT_TRACE
797 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
798 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000799 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000800 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
801 int tid = __kmp_tid_from_gtid( global_tid );
802 ompt_callbacks.ompt_callback(ompt_event_master_end)(
803 team->t.ompt_team_info.parallel_id,
804 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
805 }
806#endif
807
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808 if ( __kmp_env_consistency_check ) {
809 if( global_tid < 0 )
810 KMP_WARNING( ThreadIdentInvalid );
811
812 if( KMP_MASTER_GTID( global_tid ))
813 __kmp_pop_sync( global_tid, ct_master, loc );
814 }
815}
816
817/*!
818@ingroup WORK_SHARING
819@param loc source location information.
820@param gtid global thread number.
821
822Start execution of an <tt>ordered</tt> construct.
823*/
824void
825__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
826{
827 int cid = 0;
828 kmp_info_t *th;
829 KMP_DEBUG_ASSERT( __kmp_init_serial );
830
831 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
832
833 if (! TCR_4(__kmp_init_parallel))
834 __kmp_parallel_initialize();
835
836#if USE_ITT_BUILD
837 __kmp_itt_ordered_prep( gtid );
838 // TODO: ordered_wait_id
839#endif /* USE_ITT_BUILD */
840
841 th = __kmp_threads[ gtid ];
842
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000844 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000845 /* OMPT state update */
846 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
847 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
848
849 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000850 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000851 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
852 th->th.ompt_thread_info.wait_id);
853 }
854 }
855#endif
856
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
858 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
859 else
860 __kmp_parallel_deo( & gtid, & cid, loc );
861
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000862#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000863 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864 /* OMPT state update */
865 th->th.ompt_thread_info.state = ompt_state_work_parallel;
866 th->th.ompt_thread_info.wait_id = 0;
867
868 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000869 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000870 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
871 th->th.ompt_thread_info.wait_id);
872 }
873 }
874#endif
875
Jim Cownie5e8470a2013-09-27 10:38:44 +0000876#if USE_ITT_BUILD
877 __kmp_itt_ordered_start( gtid );
878#endif /* USE_ITT_BUILD */
879}
880
881/*!
882@ingroup WORK_SHARING
883@param loc source location information.
884@param gtid global thread number.
885
886End execution of an <tt>ordered</tt> construct.
887*/
888void
889__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
890{
891 int cid = 0;
892 kmp_info_t *th;
893
894 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
895
896#if USE_ITT_BUILD
897 __kmp_itt_ordered_end( gtid );
898 // TODO: ordered_wait_id
899#endif /* USE_ITT_BUILD */
900
901 th = __kmp_threads[ gtid ];
902
903 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
904 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
905 else
906 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000907
908#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000909 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000910 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
911 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
912 th->th.ompt_thread_info.wait_id);
913 }
914#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915}
916
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917#if KMP_USE_DYNAMIC_LOCK
918
Jonathan Peytondae13d82015-12-11 21:57:06 +0000919static __forceinline void
920__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000921{
Jonathan Peytondae13d82015-12-11 21:57:06 +0000922 // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
923 void *idx;
924 kmp_indirect_lock_t **lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000925 lck = (kmp_indirect_lock_t **)crit;
Jonathan Peytondae13d82015-12-11 21:57:06 +0000926 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
927 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
928 KMP_SET_I_LOCK_LOCATION(ilk, loc);
929 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
930 KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000931#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000932 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000933#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000934 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
935 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000936#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000937 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000938#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000939 // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
940 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000941 }
Jonathan Peytondae13d82015-12-11 21:57:06 +0000942 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000943}
944
945// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000946#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000947 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000948 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
949 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000950 kmp_uint32 spins; \
951 KMP_FSYNC_PREPARE(l); \
952 KMP_INIT_YIELD(spins); \
953 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
954 KMP_YIELD(TRUE); \
955 } else { \
956 KMP_YIELD_SPIN(spins); \
957 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000958 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
959 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000960 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
961 KMP_YIELD(TRUE); \
962 } else { \
963 KMP_YIELD_SPIN(spins); \
964 } \
965 } \
966 } \
967 KMP_FSYNC_ACQUIRED(l); \
968}
969
970// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000971#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000972 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000973 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
974 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000975}
976
977// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000978#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
979 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000980 KMP_MB(); \
981}
982
Jonathan Peytondae13d82015-12-11 21:57:06 +0000983#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000984
985# include <unistd.h>
986# include <sys/syscall.h>
987# ifndef FUTEX_WAIT
988# define FUTEX_WAIT 0
989# endif
990# ifndef FUTEX_WAKE
991# define FUTEX_WAKE 1
992# endif
993
994// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000995#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000996 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
997 kmp_int32 gtid_code = (gtid+1) << 1; \
998 KMP_MB(); \
999 KMP_FSYNC_PREPARE(ftx); \
1000 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001001 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1002 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1003 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001004 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001005 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001006 continue; \
1007 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001008 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001009 } \
1010 kmp_int32 rc; \
1011 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
1012 continue; \
1013 } \
1014 gtid_code |= 1; \
1015 } \
1016 KMP_FSYNC_ACQUIRED(ftx); \
1017}
1018
1019// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001020#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001021 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001022 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001023 KMP_FSYNC_ACQUIRED(ftx); \
1024 rc = TRUE; \
1025 } else { \
1026 rc = FALSE; \
1027 } \
1028}
1029
1030// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001031#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001032 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1033 KMP_MB(); \
1034 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001035 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1036 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1037 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001038 } \
1039 KMP_MB(); \
1040 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1041}
1042
Jonathan Peytondae13d82015-12-11 21:57:06 +00001043#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001044
1045#else // KMP_USE_DYNAMIC_LOCK
1046
Jim Cownie5e8470a2013-09-27 10:38:44 +00001047static kmp_user_lock_p
1048__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1049{
1050 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1051
1052 //
1053 // Because of the double-check, the following load
1054 // doesn't need to be volatile.
1055 //
1056 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1057
1058 if ( lck == NULL ) {
1059 void * idx;
1060
1061 // Allocate & initialize the lock.
1062 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1063 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1064 __kmp_init_user_lock_with_checks( lck );
1065 __kmp_set_user_lock_location( lck, loc );
1066#if USE_ITT_BUILD
1067 __kmp_itt_critical_creating( lck );
1068 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1069 // lock. It is the only place where we can guarantee it. There are chances the lock will
1070 // destroyed with no usage, but it is not a problem, because this is not real event seen
1071 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1072#endif /* USE_ITT_BUILD */
1073
1074 //
1075 // Use a cmpxchg instruction to slam the start of the critical
1076 // section with the lock pointer. If another thread beat us
1077 // to it, deallocate the lock, and use the lock that the other
1078 // thread allocated.
1079 //
1080 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1081
1082 if ( status == 0 ) {
1083 // Deallocate the lock and reload the value.
1084#if USE_ITT_BUILD
1085 __kmp_itt_critical_destroyed( lck );
1086 // Let ITT know the lock is destroyed and the same memory location may be reused for
1087 // another purpose.
1088#endif /* USE_ITT_BUILD */
1089 __kmp_destroy_user_lock_with_checks( lck );
1090 __kmp_user_lock_free( &idx, gtid, lck );
1091 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1092 KMP_DEBUG_ASSERT( lck != NULL );
1093 }
1094 }
1095 return lck;
1096}
1097
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001098#endif // KMP_USE_DYNAMIC_LOCK
1099
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100/*!
1101@ingroup WORK_SHARING
1102@param loc source location information.
1103@param global_tid global thread number .
1104@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1105some other suitably unique value.
1106
1107Enter code protected by a `critical` construct.
1108This function blocks until the executing thread can enter the critical section.
1109*/
1110void
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001111__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1112{
1113#if KMP_USE_DYNAMIC_LOCK
1114 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1115#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001116 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117 kmp_user_lock_p lck;
1118
1119 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1120
1121 //TODO: add THR_OVHD_STATE
1122
1123 KMP_CHECK_USER_LOCK_INIT();
1124
1125 if ( ( __kmp_user_lock_kind == lk_tas )
1126 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1127 lck = (kmp_user_lock_p)crit;
1128 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001129#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130 else if ( ( __kmp_user_lock_kind == lk_futex )
1131 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1132 lck = (kmp_user_lock_p)crit;
1133 }
1134#endif
1135 else { // ticket, queuing or drdpa
1136 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1137 }
1138
1139 if ( __kmp_env_consistency_check )
1140 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1141
1142 /* since the critical directive binds to all threads, not just
1143 * the current team we have to check this even if we are in a
1144 * serialized team */
1145 /* also, even if we are the uber thread, we still have to conduct the lock,
1146 * as we have to contend with sibling threads */
1147
1148#if USE_ITT_BUILD
1149 __kmp_itt_critical_acquiring( lck );
1150#endif /* USE_ITT_BUILD */
1151 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1153
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001154#if USE_ITT_BUILD
1155 __kmp_itt_critical_acquired( lck );
1156#endif /* USE_ITT_BUILD */
1157
1158 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001159#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001160}
1161
1162#if KMP_USE_DYNAMIC_LOCK
1163
1164// Converts the given hint to an internal lock implementation
1165static __forceinline kmp_dyna_lockseq_t
1166__kmp_map_hint_to_lock(uintptr_t hint)
1167{
1168#if KMP_USE_TSX
1169# define KMP_TSX_LOCK(seq) lockseq_##seq
1170#else
1171# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1172#endif
1173 // Hints that do not require further logic
1174 if (hint & kmp_lock_hint_hle)
1175 return KMP_TSX_LOCK(hle);
1176 if (hint & kmp_lock_hint_rtm)
1177 return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
1178 if (hint & kmp_lock_hint_adaptive)
1179 return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
1180
1181 // Rule out conflicting hints first by returning the default lock
1182 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1183 return __kmp_user_lock_seq;
1184 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1185 return __kmp_user_lock_seq;
1186
1187 // Do not even consider speculation when it appears to be contended
1188 if (hint & omp_lock_hint_contended)
1189 return lockseq_queuing;
1190
1191 // Uncontended lock without speculation
1192 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1193 return lockseq_tas;
1194
1195 // HLE lock for speculation
1196 if (hint & omp_lock_hint_speculative)
1197 return KMP_TSX_LOCK(hle);
1198
1199 return __kmp_user_lock_seq;
1200}
1201
1202/*!
1203@ingroup WORK_SHARING
1204@param loc source location information.
1205@param global_tid global thread number.
1206@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
1207or some other suitably unique value.
1208@param hint the lock hint.
1209
1210Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
1211This function blocks until the executing thread can enter the critical section unless the hint suggests use of
1212speculative execution and the hardware supports it.
1213*/
1214void
1215__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1216{
1217 KMP_COUNT_BLOCK(OMP_CRITICAL);
1218 kmp_user_lock_p lck;
1219
1220 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1221
1222 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1223 // Check if it is initialized.
1224 if (*lk == 0) {
1225 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1226 if (KMP_IS_D_LOCK(lckseq)) {
1227 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1228 } else {
1229 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1230 }
1231 }
1232 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1233 // this lock initialization does not follow the normal dispatch path (lock table is not used).
1234 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1235 lck = (kmp_user_lock_p)lk;
1236 if (__kmp_env_consistency_check) {
1237 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1238 }
1239# if USE_ITT_BUILD
1240 __kmp_itt_critical_acquiring(lck);
1241# endif
1242# if KMP_USE_INLINED_TAS
1243 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1244 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1245 } else
1246# elif KMP_USE_INLINED_FUTEX
1247 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1248 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1249 } else
1250# endif
1251 {
1252 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1253 }
1254 } else {
1255 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1256 lck = ilk->lock;
1257 if (__kmp_env_consistency_check) {
1258 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1259 }
1260# if USE_ITT_BUILD
1261 __kmp_itt_critical_acquiring(lck);
1262# endif
1263 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1264 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001265
Jim Cownie5e8470a2013-09-27 10:38:44 +00001266#if USE_ITT_BUILD
1267 __kmp_itt_critical_acquired( lck );
1268#endif /* USE_ITT_BUILD */
1269
1270 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001271} // __kmpc_critical_with_hint
1272
1273#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001274
1275/*!
1276@ingroup WORK_SHARING
1277@param loc source location information.
1278@param global_tid global thread number .
1279@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1280some other suitably unique value.
1281
1282Leave a critical section, releasing any lock that was held during its execution.
1283*/
1284void
1285__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1286{
1287 kmp_user_lock_p lck;
1288
1289 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1290
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001291#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001292 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001293 lck = (kmp_user_lock_p)crit;
1294 KMP_ASSERT(lck != NULL);
1295 if (__kmp_env_consistency_check) {
1296 __kmp_pop_sync(global_tid, ct_critical, loc);
1297 }
1298# if USE_ITT_BUILD
1299 __kmp_itt_critical_releasing( lck );
1300# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00001301# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001302 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001303 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001304 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00001305# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001306 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001307 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001308 } else
1309# endif
1310 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001311 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001312 }
1313 } else {
1314 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1315 KMP_ASSERT(ilk != NULL);
1316 lck = ilk->lock;
1317 if (__kmp_env_consistency_check) {
1318 __kmp_pop_sync(global_tid, ct_critical, loc);
1319 }
1320# if USE_ITT_BUILD
1321 __kmp_itt_critical_releasing( lck );
1322# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001323 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001324 }
1325
1326#else // KMP_USE_DYNAMIC_LOCK
1327
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328 if ( ( __kmp_user_lock_kind == lk_tas )
1329 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1330 lck = (kmp_user_lock_p)crit;
1331 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001332#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001333 else if ( ( __kmp_user_lock_kind == lk_futex )
1334 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1335 lck = (kmp_user_lock_p)crit;
1336 }
1337#endif
1338 else { // ticket, queuing or drdpa
1339 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1340 }
1341
1342 KMP_ASSERT(lck != NULL);
1343
1344 if ( __kmp_env_consistency_check )
1345 __kmp_pop_sync( global_tid, ct_critical, loc );
1346
1347#if USE_ITT_BUILD
1348 __kmp_itt_critical_releasing( lck );
1349#endif /* USE_ITT_BUILD */
1350 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001351 __kmp_release_user_lock_with_checks( lck, global_tid );
1352
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001353#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001354 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001355 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1356 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1357 (uint64_t) lck);
1358 }
1359#endif
1360
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001361#endif // KMP_USE_DYNAMIC_LOCK
1362
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1364}
1365
1366/*!
1367@ingroup SYNCHRONIZATION
1368@param loc source location information
1369@param global_tid thread id.
1370@return one if the thread should execute the master block, zero otherwise
1371
1372Start execution of a combined barrier and master. The barrier is executed inside this function.
1373*/
1374kmp_int32
1375__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1376{
1377 int status;
1378
1379 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1380
1381 if (! TCR_4(__kmp_init_parallel))
1382 __kmp_parallel_initialize();
1383
1384 if ( __kmp_env_consistency_check )
1385 __kmp_check_barrier( global_tid, ct_barrier, loc );
1386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001387#if USE_ITT_NOTIFY
1388 __kmp_threads[global_tid]->th.th_ident = loc;
1389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1391
1392 return (status != 0) ? 0 : 1;
1393}
1394
1395/*!
1396@ingroup SYNCHRONIZATION
1397@param loc source location information
1398@param global_tid thread id.
1399
1400Complete the execution of a combined barrier and master. This function should
1401only be called at the completion of the <tt>master</tt> code. Other threads will
1402still be waiting at the barrier and this call releases them.
1403*/
1404void
1405__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1406{
1407 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1408
1409 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1410}
1411
1412/*!
1413@ingroup SYNCHRONIZATION
1414@param loc source location information
1415@param global_tid thread id.
1416@return one if the thread should execute the master block, zero otherwise
1417
1418Start execution of a combined barrier and master(nowait) construct.
1419The barrier is executed inside this function.
1420There is no equivalent "end" function, since the
1421*/
1422kmp_int32
1423__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1424{
1425 kmp_int32 ret;
1426
1427 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1428
1429 if (! TCR_4(__kmp_init_parallel))
1430 __kmp_parallel_initialize();
1431
1432 if ( __kmp_env_consistency_check ) {
1433 if ( loc == 0 ) {
1434 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1435 }
1436 __kmp_check_barrier( global_tid, ct_barrier, loc );
1437 }
1438
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001439#if USE_ITT_NOTIFY
1440 __kmp_threads[global_tid]->th.th_ident = loc;
1441#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1443
1444 ret = __kmpc_master (loc, global_tid);
1445
1446 if ( __kmp_env_consistency_check ) {
1447 /* there's no __kmpc_end_master called; so the (stats) */
1448 /* actions of __kmpc_end_master are done here */
1449
1450 if ( global_tid < 0 ) {
1451 KMP_WARNING( ThreadIdentInvalid );
1452 }
1453 if (ret) {
1454 /* only one thread should do the pop since only */
1455 /* one did the push (see __kmpc_master()) */
1456
1457 __kmp_pop_sync( global_tid, ct_master, loc );
1458 }
1459 }
1460
1461 return (ret);
1462}
1463
1464/* The BARRIER for a SINGLE process section is always explicit */
1465/*!
1466@ingroup WORK_SHARING
1467@param loc source location information
1468@param global_tid global thread number
1469@return One if this thread should execute the single construct, zero otherwise.
1470
1471Test whether to execute a <tt>single</tt> construct.
1472There are no implicit barriers in the two "single" calls, rather the compiler should
1473introduce an explicit barrier if it is required.
1474*/
1475
1476kmp_int32
1477__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1478{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001479 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001480 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001481 if(rc == TRUE) {
1482 KMP_START_EXPLICIT_TIMER(OMP_single);
1483 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001484
1485#if OMPT_SUPPORT && OMPT_TRACE
1486 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1487 kmp_team_t *team = this_thr -> th.th_team;
1488 int tid = __kmp_tid_from_gtid( global_tid );
1489
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001490 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001491 if (rc) {
1492 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1493 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1494 team->t.ompt_team_info.parallel_id,
1495 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1496 team->t.ompt_team_info.microtask);
1497 }
1498 } else {
1499 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1500 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1501 team->t.ompt_team_info.parallel_id,
1502 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1503 }
1504 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1505 }
1506 }
1507#endif
1508
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509 return rc;
1510}
1511
1512/*!
1513@ingroup WORK_SHARING
1514@param loc source location information
1515@param global_tid global thread number
1516
1517Mark the end of a <tt>single</tt> construct. This function should
1518only be called by the thread that executed the block of code protected
1519by the `single` construct.
1520*/
1521void
1522__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1523{
1524 __kmp_exit_single( global_tid );
Jonathan Peyton45be4502015-08-11 21:36:41 +00001525 KMP_STOP_EXPLICIT_TIMER(OMP_single);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001526
1527#if OMPT_SUPPORT && OMPT_TRACE
1528 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1529 kmp_team_t *team = this_thr -> th.th_team;
1530 int tid = __kmp_tid_from_gtid( global_tid );
1531
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001532 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001533 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1534 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1535 team->t.ompt_team_info.parallel_id,
1536 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1537 }
1538#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001539}
1540
1541/*!
1542@ingroup WORK_SHARING
1543@param loc Source location
1544@param global_tid Global thread id
1545
1546Mark the end of a statically scheduled loop.
1547*/
1548void
1549__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1550{
1551 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1552
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001553#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001554 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonathan Peytonf0344bb2015-10-09 17:42:52 +00001556 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1557 kmp_team_t *team = this_thr -> th.th_team;
1558 int tid = __kmp_tid_from_gtid( global_tid );
1559
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001560 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1561 team->t.ompt_team_info.parallel_id,
1562 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1563 }
1564#endif
1565
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566 if ( __kmp_env_consistency_check )
1567 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1568}
1569
1570/*
1571 * User routines which take C-style arguments (call by value)
1572 * different from the Fortran equivalent routines
1573 */
1574
1575void
1576ompc_set_num_threads( int arg )
1577{
1578// !!!!! TODO: check the per-task binding
1579 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1580}
1581
1582void
1583ompc_set_dynamic( int flag )
1584{
1585 kmp_info_t *thread;
1586
1587 /* For the thread-private implementation of the internal controls */
1588 thread = __kmp_entry_thread();
1589
1590 __kmp_save_internal_controls( thread );
1591
1592 set__dynamic( thread, flag ? TRUE : FALSE );
1593}
1594
1595void
1596ompc_set_nested( int flag )
1597{
1598 kmp_info_t *thread;
1599
1600 /* For the thread-private internal controls implementation */
1601 thread = __kmp_entry_thread();
1602
1603 __kmp_save_internal_controls( thread );
1604
1605 set__nested( thread, flag ? TRUE : FALSE );
1606}
1607
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608void
1609ompc_set_max_active_levels( int max_active_levels )
1610{
1611 /* TO DO */
1612 /* we want per-task implementation of this internal control */
1613
1614 /* For the per-thread internal controls implementation */
1615 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1616}
1617
1618void
1619ompc_set_schedule( omp_sched_t kind, int modifier )
1620{
1621// !!!!! TODO: check the per-task binding
1622 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1623}
1624
1625int
1626ompc_get_ancestor_thread_num( int level )
1627{
1628 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1629}
1630
1631int
1632ompc_get_team_size( int level )
1633{
1634 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1635}
1636
Jim Cownie5e8470a2013-09-27 10:38:44 +00001637void
1638kmpc_set_stacksize( int arg )
1639{
1640 // __kmp_aux_set_stacksize initializes the library if needed
1641 __kmp_aux_set_stacksize( arg );
1642}
1643
1644void
1645kmpc_set_stacksize_s( size_t arg )
1646{
1647 // __kmp_aux_set_stacksize initializes the library if needed
1648 __kmp_aux_set_stacksize( arg );
1649}
1650
1651void
1652kmpc_set_blocktime( int arg )
1653{
1654 int gtid, tid;
1655 kmp_info_t *thread;
1656
1657 gtid = __kmp_entry_gtid();
1658 tid = __kmp_tid_from_gtid(gtid);
1659 thread = __kmp_thread_from_gtid(gtid);
1660
1661 __kmp_aux_set_blocktime( arg, thread, tid );
1662}
1663
1664void
1665kmpc_set_library( int arg )
1666{
1667 // __kmp_user_set_library initializes the library if needed
1668 __kmp_user_set_library( (enum library_type)arg );
1669}
1670
1671void
1672kmpc_set_defaults( char const * str )
1673{
1674 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001675 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001676}
1677
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678int
1679kmpc_set_affinity_mask_proc( int proc, void **mask )
1680{
Alp Toker98758b02014-03-02 04:12:06 +00001681#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682 return -1;
1683#else
1684 if ( ! TCR_4(__kmp_init_middle) ) {
1685 __kmp_middle_initialize();
1686 }
1687 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1688#endif
1689}
1690
1691int
1692kmpc_unset_affinity_mask_proc( int proc, void **mask )
1693{
Alp Toker98758b02014-03-02 04:12:06 +00001694#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 return -1;
1696#else
1697 if ( ! TCR_4(__kmp_init_middle) ) {
1698 __kmp_middle_initialize();
1699 }
1700 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1701#endif
1702}
1703
1704int
1705kmpc_get_affinity_mask_proc( int proc, void **mask )
1706{
Alp Toker98758b02014-03-02 04:12:06 +00001707#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001708 return -1;
1709#else
1710 if ( ! TCR_4(__kmp_init_middle) ) {
1711 __kmp_middle_initialize();
1712 }
1713 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1714#endif
1715}
1716
Jim Cownie5e8470a2013-09-27 10:38:44 +00001717
1718/* -------------------------------------------------------------------------- */
1719/*!
1720@ingroup THREADPRIVATE
1721@param loc source location information
1722@param gtid global thread number
1723@param cpy_size size of the cpy_data buffer
1724@param cpy_data pointer to data to be copied
1725@param cpy_func helper function to call for copying data
1726@param didit flag variable: 1=single thread; 0=not single thread
1727
1728__kmpc_copyprivate implements the interface for the private data broadcast needed for
1729the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1730All threads participating in the parallel region call this routine.
1731One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1732and all other threads should have that variable set to 0.
1733All threads pass a pointer to a data buffer (cpy_data) that they have built.
1734
1735The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1736clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1737race conditions, so the code generation for the single region should avoid generating a barrier
1738after the call to @ref __kmpc_copyprivate.
1739
1740The <tt>gtid</tt> parameter is the global thread id for the current thread.
1741The <tt>loc</tt> parameter is a pointer to source location information.
1742
1743Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1744to a team-private location, then the other threads will each call the function pointed to by
1745the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1746
1747The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1748and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1749the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1750to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1751
1752The interface to cpy_func is as follows:
1753@code
1754void cpy_func( void *destination, void *source )
1755@endcode
1756where void *destination is the cpy_data pointer for the thread being copied to
1757and void *source is the cpy_data pointer for the thread being copied from.
1758*/
1759void
1760__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1761{
1762 void **data_ptr;
1763
1764 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1765
1766 KMP_MB();
1767
1768 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1769
1770 if ( __kmp_env_consistency_check ) {
1771 if ( loc == 0 ) {
1772 KMP_WARNING( ConstructIdentInvalid );
1773 }
1774 }
1775
1776 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1777
1778 if (didit) *data_ptr = cpy_data;
1779
1780 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001781#if USE_ITT_NOTIFY
1782 __kmp_threads[gtid]->th.th_ident = loc;
1783#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001784 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1785
1786 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1787
1788 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1789 /* Nesting checks are already handled by the single construct checks */
1790
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001791#if USE_ITT_NOTIFY
1792 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1793#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1795}
1796
1797/* -------------------------------------------------------------------------- */
1798
1799#define INIT_LOCK __kmp_init_user_lock_with_checks
1800#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1801#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1802#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1803#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1804#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1805#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1806#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1807#define TEST_LOCK __kmp_test_user_lock_with_checks
1808#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1809#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1810#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1811
1812
1813/*
1814 * TODO: Make check abort messages use location info & pass it
1815 * into with_checks routines
1816 */
1817
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001818#if KMP_USE_DYNAMIC_LOCK
1819
1820// internal lock initializer
1821static __forceinline void
1822__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1823{
1824 if (KMP_IS_D_LOCK(seq)) {
1825 KMP_INIT_D_LOCK(lock, seq);
1826#if USE_ITT_BUILD
1827 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1828#endif
1829 } else {
1830 KMP_INIT_I_LOCK(lock, seq);
1831#if USE_ITT_BUILD
1832 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1833 __kmp_itt_lock_creating(ilk->lock, loc);
1834#endif
1835 }
1836}
1837
1838// internal nest lock initializer
1839static __forceinline void
1840__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1841{
1842#if KMP_USE_TSX
1843 // Don't have nested lock implementation for speculative locks
1844 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1845 seq = __kmp_user_lock_seq;
1846#endif
1847 switch (seq) {
1848 case lockseq_tas:
1849 seq = lockseq_nested_tas;
1850 break;
1851#if KMP_USE_FUTEX
1852 case lockseq_futex:
1853 seq = lockseq_nested_futex;
1854 break;
1855#endif
1856 case lockseq_ticket:
1857 seq = lockseq_nested_ticket;
1858 break;
1859 case lockseq_queuing:
1860 seq = lockseq_nested_queuing;
1861 break;
1862 case lockseq_drdpa:
1863 seq = lockseq_nested_drdpa;
1864 break;
1865 default:
1866 seq = lockseq_nested_queuing;
1867 }
1868 KMP_INIT_I_LOCK(lock, seq);
1869#if USE_ITT_BUILD
1870 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1871 __kmp_itt_lock_creating(ilk->lock, loc);
1872#endif
1873}
1874
1875/* initialize the lock with a hint */
1876void
1877__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1878{
1879 KMP_DEBUG_ASSERT(__kmp_init_serial);
1880 if (__kmp_env_consistency_check && user_lock == NULL) {
1881 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1882 }
1883
1884 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1885}
1886
1887/* initialize the lock with a hint */
1888void
1889__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1890{
1891 KMP_DEBUG_ASSERT(__kmp_init_serial);
1892 if (__kmp_env_consistency_check && user_lock == NULL) {
1893 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1894 }
1895
1896 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1897}
1898
1899#endif // KMP_USE_DYNAMIC_LOCK
1900
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901/* initialize the lock */
1902void
1903__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001904#if KMP_USE_DYNAMIC_LOCK
1905 KMP_DEBUG_ASSERT(__kmp_init_serial);
1906 if (__kmp_env_consistency_check && user_lock == NULL) {
1907 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1908 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001909 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001910
1911#else // KMP_USE_DYNAMIC_LOCK
1912
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913 static char const * const func = "omp_init_lock";
1914 kmp_user_lock_p lck;
1915 KMP_DEBUG_ASSERT( __kmp_init_serial );
1916
1917 if ( __kmp_env_consistency_check ) {
1918 if ( user_lock == NULL ) {
1919 KMP_FATAL( LockIsUninitialized, func );
1920 }
1921 }
1922
1923 KMP_CHECK_USER_LOCK_INIT();
1924
1925 if ( ( __kmp_user_lock_kind == lk_tas )
1926 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1927 lck = (kmp_user_lock_p)user_lock;
1928 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001929#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930 else if ( ( __kmp_user_lock_kind == lk_futex )
1931 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1932 lck = (kmp_user_lock_p)user_lock;
1933 }
1934#endif
1935 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001936 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001937 }
1938 INIT_LOCK( lck );
1939 __kmp_set_user_lock_location( lck, loc );
1940
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001941#if OMPT_SUPPORT && OMPT_TRACE
1942 if (ompt_enabled &&
1943 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1944 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1945 }
1946#endif
1947
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948#if USE_ITT_BUILD
1949 __kmp_itt_lock_creating( lck );
1950#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001951
1952#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953} // __kmpc_init_lock
1954
1955/* initialize the lock */
1956void
1957__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001958#if KMP_USE_DYNAMIC_LOCK
1959
1960 KMP_DEBUG_ASSERT(__kmp_init_serial);
1961 if (__kmp_env_consistency_check && user_lock == NULL) {
1962 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1963 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001964 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001965
1966#else // KMP_USE_DYNAMIC_LOCK
1967
Jim Cownie5e8470a2013-09-27 10:38:44 +00001968 static char const * const func = "omp_init_nest_lock";
1969 kmp_user_lock_p lck;
1970 KMP_DEBUG_ASSERT( __kmp_init_serial );
1971
1972 if ( __kmp_env_consistency_check ) {
1973 if ( user_lock == NULL ) {
1974 KMP_FATAL( LockIsUninitialized, func );
1975 }
1976 }
1977
1978 KMP_CHECK_USER_LOCK_INIT();
1979
1980 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1981 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1982 lck = (kmp_user_lock_p)user_lock;
1983 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001984#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985 else if ( ( __kmp_user_lock_kind == lk_futex )
1986 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1987 <= OMP_NEST_LOCK_T_SIZE ) ) {
1988 lck = (kmp_user_lock_p)user_lock;
1989 }
1990#endif
1991 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001992 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001993 }
1994
1995 INIT_NESTED_LOCK( lck );
1996 __kmp_set_user_lock_location( lck, loc );
1997
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001998#if OMPT_SUPPORT && OMPT_TRACE
1999 if (ompt_enabled &&
2000 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
2001 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
2002 }
2003#endif
2004
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005#if USE_ITT_BUILD
2006 __kmp_itt_lock_creating( lck );
2007#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002008
2009#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010} // __kmpc_init_nest_lock
2011
2012void
2013__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002014#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002016# if USE_ITT_BUILD
2017 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002018 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2019 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002020 } else {
2021 lck = (kmp_user_lock_p)user_lock;
2022 }
2023 __kmp_itt_lock_destroyed(lck);
2024# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002025 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002026#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027 kmp_user_lock_p lck;
2028
2029 if ( ( __kmp_user_lock_kind == lk_tas )
2030 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2031 lck = (kmp_user_lock_p)user_lock;
2032 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002033#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034 else if ( ( __kmp_user_lock_kind == lk_futex )
2035 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2036 lck = (kmp_user_lock_p)user_lock;
2037 }
2038#endif
2039 else {
2040 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2041 }
2042
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002043#if OMPT_SUPPORT && OMPT_TRACE
2044 if (ompt_enabled &&
2045 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2046 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2047 }
2048#endif
2049
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050#if USE_ITT_BUILD
2051 __kmp_itt_lock_destroyed( lck );
2052#endif /* USE_ITT_BUILD */
2053 DESTROY_LOCK( lck );
2054
2055 if ( ( __kmp_user_lock_kind == lk_tas )
2056 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2057 ;
2058 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002059#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 else if ( ( __kmp_user_lock_kind == lk_futex )
2061 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2062 ;
2063 }
2064#endif
2065 else {
2066 __kmp_user_lock_free( user_lock, gtid, lck );
2067 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002068#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002069} // __kmpc_destroy_lock
2070
2071/* destroy the lock */
2072void
2073__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002074#if KMP_USE_DYNAMIC_LOCK
2075
2076# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002077 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002078 __kmp_itt_lock_destroyed(ilk->lock);
2079# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002080 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002081
2082#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
2084 kmp_user_lock_p lck;
2085
2086 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2087 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2088 lck = (kmp_user_lock_p)user_lock;
2089 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002090#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091 else if ( ( __kmp_user_lock_kind == lk_futex )
2092 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2093 <= OMP_NEST_LOCK_T_SIZE ) ) {
2094 lck = (kmp_user_lock_p)user_lock;
2095 }
2096#endif
2097 else {
2098 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2099 }
2100
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002101#if OMPT_SUPPORT && OMPT_TRACE
2102 if (ompt_enabled &&
2103 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2104 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2105 }
2106#endif
2107
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108#if USE_ITT_BUILD
2109 __kmp_itt_lock_destroyed( lck );
2110#endif /* USE_ITT_BUILD */
2111
2112 DESTROY_NESTED_LOCK( lck );
2113
2114 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2115 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2116 ;
2117 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002118#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002119 else if ( ( __kmp_user_lock_kind == lk_futex )
2120 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2121 <= OMP_NEST_LOCK_T_SIZE ) ) {
2122 ;
2123 }
2124#endif
2125 else {
2126 __kmp_user_lock_free( user_lock, gtid, lck );
2127 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002128#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129} // __kmpc_destroy_nest_lock
2130
2131void
2132__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002133 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002134#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002135 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002136# if USE_ITT_BUILD
2137 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2138# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002139# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002140 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002141 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002142 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002143# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002144 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002145 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002146 } else
2147# endif
2148 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002149 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002150 }
2151# if USE_ITT_BUILD
2152 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2153# endif
2154
2155#else // KMP_USE_DYNAMIC_LOCK
2156
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157 kmp_user_lock_p lck;
2158
2159 if ( ( __kmp_user_lock_kind == lk_tas )
2160 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2161 lck = (kmp_user_lock_p)user_lock;
2162 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002163#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002164 else if ( ( __kmp_user_lock_kind == lk_futex )
2165 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2166 lck = (kmp_user_lock_p)user_lock;
2167 }
2168#endif
2169 else {
2170 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2171 }
2172
2173#if USE_ITT_BUILD
2174 __kmp_itt_lock_acquiring( lck );
2175#endif /* USE_ITT_BUILD */
2176
2177 ACQUIRE_LOCK( lck, gtid );
2178
2179#if USE_ITT_BUILD
2180 __kmp_itt_lock_acquired( lck );
2181#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002182
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002183#if OMPT_SUPPORT && OMPT_TRACE
2184 if (ompt_enabled &&
2185 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2186 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2187 }
2188#endif
2189
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002190#endif // KMP_USE_DYNAMIC_LOCK
2191}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192
2193void
2194__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002195#if KMP_USE_DYNAMIC_LOCK
2196
2197# if USE_ITT_BUILD
2198 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2199# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002200 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002201# if USE_ITT_BUILD
2202 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2203#endif
2204
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002205#if OMPT_SUPPORT && OMPT_TRACE
2206 if (ompt_enabled) {
2207 // missing support here: need to know whether acquired first or not
2208 }
2209#endif
2210
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002211#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002212 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002213 kmp_user_lock_p lck;
2214
2215 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2216 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2217 lck = (kmp_user_lock_p)user_lock;
2218 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002219#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 else if ( ( __kmp_user_lock_kind == lk_futex )
2221 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2222 <= OMP_NEST_LOCK_T_SIZE ) ) {
2223 lck = (kmp_user_lock_p)user_lock;
2224 }
2225#endif
2226 else {
2227 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2228 }
2229
2230#if USE_ITT_BUILD
2231 __kmp_itt_lock_acquiring( lck );
2232#endif /* USE_ITT_BUILD */
2233
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002234 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002235
2236#if USE_ITT_BUILD
2237 __kmp_itt_lock_acquired( lck );
2238#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002239
2240#if OMPT_SUPPORT && OMPT_TRACE
2241 if (ompt_enabled) {
2242 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2243 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2244 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2245 } else {
2246 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2247 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2248 }
2249 }
2250#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002251
2252#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002253}
2254
2255void
2256__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2257{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002258#if KMP_USE_DYNAMIC_LOCK
2259
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002260 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002261# if USE_ITT_BUILD
2262 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2263# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002264# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002265 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002266 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002267 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002268# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002269 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002270 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002271 } else
2272# endif
2273 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002274 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002275 }
2276
2277#else // KMP_USE_DYNAMIC_LOCK
2278
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279 kmp_user_lock_p lck;
2280
2281 /* Can't use serial interval since not block structured */
2282 /* release the lock */
2283
2284 if ( ( __kmp_user_lock_kind == lk_tas )
2285 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002286#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287 // "fast" path implemented to fix customer performance issue
2288#if USE_ITT_BUILD
2289 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2290#endif /* USE_ITT_BUILD */
2291 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2292 KMP_MB();
2293 return;
2294#else
2295 lck = (kmp_user_lock_p)user_lock;
2296#endif
2297 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002298#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299 else if ( ( __kmp_user_lock_kind == lk_futex )
2300 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2301 lck = (kmp_user_lock_p)user_lock;
2302 }
2303#endif
2304 else {
2305 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2306 }
2307
2308#if USE_ITT_BUILD
2309 __kmp_itt_lock_releasing( lck );
2310#endif /* USE_ITT_BUILD */
2311
2312 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002313
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002314#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002315 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2317 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2318 }
2319#endif
2320
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002321#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002322}
2323
2324/* release the lock */
2325void
2326__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2327{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002328#if KMP_USE_DYNAMIC_LOCK
2329
2330# if USE_ITT_BUILD
2331 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2332# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002333 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002334
2335#else // KMP_USE_DYNAMIC_LOCK
2336
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337 kmp_user_lock_p lck;
2338
2339 /* Can't use serial interval since not block structured */
2340
2341 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2342 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002343#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002344 // "fast" path implemented to fix customer performance issue
2345 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2346#if USE_ITT_BUILD
2347 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2348#endif /* USE_ITT_BUILD */
2349 if ( --(tl->lk.depth_locked) == 0 ) {
2350 TCW_4(tl->lk.poll, 0);
2351 }
2352 KMP_MB();
2353 return;
2354#else
2355 lck = (kmp_user_lock_p)user_lock;
2356#endif
2357 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002358#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359 else if ( ( __kmp_user_lock_kind == lk_futex )
2360 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2361 <= OMP_NEST_LOCK_T_SIZE ) ) {
2362 lck = (kmp_user_lock_p)user_lock;
2363 }
2364#endif
2365 else {
2366 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2367 }
2368
2369#if USE_ITT_BUILD
2370 __kmp_itt_lock_releasing( lck );
2371#endif /* USE_ITT_BUILD */
2372
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002373 int release_status;
2374 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002375#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002376 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002377 if (release_status == KMP_LOCK_RELEASED) {
2378 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2379 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2380 (uint64_t) lck);
2381 }
2382 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2383 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2384 (uint64_t) lck);
2385 }
2386 }
2387#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002388
2389#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390}
2391
2392/* try to acquire the lock */
2393int
2394__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2395{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002396 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002397
2398#if KMP_USE_DYNAMIC_LOCK
2399 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002400 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002401# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002402 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002403# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002404# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002405 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002406 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002407 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002408# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002409 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002410 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002411 } else
2412# endif
2413 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002414 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002415 }
2416 if (rc) {
2417# if USE_ITT_BUILD
2418 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2419# endif
2420 return FTN_TRUE;
2421 } else {
2422# if USE_ITT_BUILD
2423 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2424# endif
2425 return FTN_FALSE;
2426 }
2427
2428#else // KMP_USE_DYNAMIC_LOCK
2429
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430 kmp_user_lock_p lck;
2431 int rc;
2432
2433 if ( ( __kmp_user_lock_kind == lk_tas )
2434 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2435 lck = (kmp_user_lock_p)user_lock;
2436 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002437#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002438 else if ( ( __kmp_user_lock_kind == lk_futex )
2439 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2440 lck = (kmp_user_lock_p)user_lock;
2441 }
2442#endif
2443 else {
2444 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2445 }
2446
2447#if USE_ITT_BUILD
2448 __kmp_itt_lock_acquiring( lck );
2449#endif /* USE_ITT_BUILD */
2450
2451 rc = TEST_LOCK( lck, gtid );
2452#if USE_ITT_BUILD
2453 if ( rc ) {
2454 __kmp_itt_lock_acquired( lck );
2455 } else {
2456 __kmp_itt_lock_cancelled( lck );
2457 }
2458#endif /* USE_ITT_BUILD */
2459 return ( rc ? FTN_TRUE : FTN_FALSE );
2460
2461 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002462
2463#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464}
2465
2466/* try to acquire the lock */
2467int
2468__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2469{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002470#if KMP_USE_DYNAMIC_LOCK
2471 int rc;
2472# if USE_ITT_BUILD
2473 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2474# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002475 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002476# if USE_ITT_BUILD
2477 if (rc) {
2478 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2479 } else {
2480 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2481 }
2482# endif
2483 return rc;
2484
2485#else // KMP_USE_DYNAMIC_LOCK
2486
Jim Cownie5e8470a2013-09-27 10:38:44 +00002487 kmp_user_lock_p lck;
2488 int rc;
2489
2490 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2491 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2492 lck = (kmp_user_lock_p)user_lock;
2493 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002494#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495 else if ( ( __kmp_user_lock_kind == lk_futex )
2496 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2497 <= OMP_NEST_LOCK_T_SIZE ) ) {
2498 lck = (kmp_user_lock_p)user_lock;
2499 }
2500#endif
2501 else {
2502 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2503 }
2504
2505#if USE_ITT_BUILD
2506 __kmp_itt_lock_acquiring( lck );
2507#endif /* USE_ITT_BUILD */
2508
2509 rc = TEST_NESTED_LOCK( lck, gtid );
2510#if USE_ITT_BUILD
2511 if ( rc ) {
2512 __kmp_itt_lock_acquired( lck );
2513 } else {
2514 __kmp_itt_lock_cancelled( lck );
2515 }
2516#endif /* USE_ITT_BUILD */
2517 return rc;
2518
2519 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002520
2521#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522}
2523
2524
2525/*--------------------------------------------------------------------------------------------------------------------*/
2526
2527/*
2528 * Interface to fast scalable reduce methods routines
2529 */
2530
2531// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2532// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2533// AT: which solution is better?
2534#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2535 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2536
2537#define __KMP_GET_REDUCTION_METHOD(gtid) \
2538 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2539
2540// description of the packed_reduction_method variable: look at the macros in kmp.h
2541
2542
2543// used in a critical section reduce block
2544static __forceinline void
2545__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2546
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002547 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 // (although it's used for an internal purpose only)
2549 // why was it visible in previous implementation?
2550 // should we keep it visible in new reduce block?
2551 kmp_user_lock_p lck;
2552
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002553#if KMP_USE_DYNAMIC_LOCK
2554
Jonathan Peytondae13d82015-12-11 21:57:06 +00002555 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2556 // Check if it is initialized.
2557 if (*lk == 0) {
2558 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2559 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2560 } else {
2561 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002562 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002563 }
2564 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2565 // this lock initialization does not follow the normal dispatch path (lock table is not used).
2566 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2567 lck = (kmp_user_lock_p)lk;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002568 KMP_DEBUG_ASSERT(lck != NULL);
2569 if (__kmp_env_consistency_check) {
2570 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2571 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002572 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002573 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002574 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2575 lck = ilk->lock;
2576 KMP_DEBUG_ASSERT(lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002577 if (__kmp_env_consistency_check) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002578 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002579 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002581 }
2582
2583#else // KMP_USE_DYNAMIC_LOCK
2584
Jim Cownie5e8470a2013-09-27 10:38:44 +00002585 // We know that the fast reduction code is only emitted by Intel compilers
2586 // with 32 byte critical sections. If there isn't enough space, then we
2587 // have to use a pointer.
2588 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2589 lck = (kmp_user_lock_p)crit;
2590 }
2591 else {
2592 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2593 }
2594 KMP_DEBUG_ASSERT( lck != NULL );
2595
2596 if ( __kmp_env_consistency_check )
2597 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2598
2599 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002600
2601#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002602}
2603
2604// used in a critical section reduce block
2605static __forceinline void
2606__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2607
2608 kmp_user_lock_p lck;
2609
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002610#if KMP_USE_DYNAMIC_LOCK
2611
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002612 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002613 lck = (kmp_user_lock_p)crit;
2614 if (__kmp_env_consistency_check)
2615 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002616 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002617 } else {
2618 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2619 if (__kmp_env_consistency_check)
2620 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002621 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002622 }
2623
2624#else // KMP_USE_DYNAMIC_LOCK
2625
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2627 // sections. If there isn't enough space, then we have to use a pointer.
2628 if ( __kmp_base_user_lock_size > 32 ) {
2629 lck = *( (kmp_user_lock_p *) crit );
2630 KMP_ASSERT( lck != NULL );
2631 } else {
2632 lck = (kmp_user_lock_p) crit;
2633 }
2634
2635 if ( __kmp_env_consistency_check )
2636 __kmp_pop_sync( global_tid, ct_critical, loc );
2637
2638 __kmp_release_user_lock_with_checks( lck, global_tid );
2639
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002640#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641} // __kmp_end_critical_section_reduce_block
2642
2643
2644/* 2.a.i. Reduce Block without a terminating barrier */
2645/*!
2646@ingroup SYNCHRONIZATION
2647@param loc source location information
2648@param global_tid global thread number
2649@param num_vars number of items (variables) to be reduced
2650@param reduce_size size of data in bytes to be reduced
2651@param reduce_data pointer to data to be reduced
2652@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2653@param lck pointer to the unique lock data structure
2654@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2655
2656The nowait version is used for a reduce clause with the nowait argument.
2657*/
2658kmp_int32
2659__kmpc_reduce_nowait(
2660 ident_t *loc, kmp_int32 global_tid,
2661 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2662 kmp_critical_name *lck ) {
2663
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002664 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002665 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002666 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002667#if OMP_40_ENABLED
2668 kmp_team_t *team;
2669 kmp_info_t *th;
2670 int teams_swapped = 0, task_state;
2671#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2673
2674 // why do we need this initialization here at all?
2675 // Reduction clause can not be used as a stand-alone directive.
2676
2677 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2678 // possible detection of false-positive race by the threadchecker ???
2679 if( ! TCR_4( __kmp_init_parallel ) )
2680 __kmp_parallel_initialize();
2681
2682 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002683#if KMP_USE_DYNAMIC_LOCK
2684 if ( __kmp_env_consistency_check )
2685 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2686#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002687 if ( __kmp_env_consistency_check )
2688 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002689#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002690
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002691#if OMP_40_ENABLED
2692 th = __kmp_thread_from_gtid(global_tid);
2693 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2694 team = th->th.th_team;
2695 if( team->t.t_level == th->th.th_teams_level ) {
2696 // this is reduction at teams construct
2697 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2698 // Let's swap teams temporarily for the reduction barrier
2699 teams_swapped = 1;
2700 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2701 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002702 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002703 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002704 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002705 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002706 }
2707 }
2708#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709
2710 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2711 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2712 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2713 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2714 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2715 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2716 // a thread-specific "th_local.reduction_method" variable is used currently
2717 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2718
2719 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2720 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2721
2722 if( packed_reduction_method == critical_reduce_block ) {
2723
2724 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2725 retval = 1;
2726
2727 } else if( packed_reduction_method == empty_reduce_block ) {
2728
2729 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2730 retval = 1;
2731
2732 } else if( packed_reduction_method == atomic_reduce_block ) {
2733
2734 retval = 2;
2735
2736 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2737 // (it's not quite good, because the checking block has been closed by this 'pop',
2738 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2739 if ( __kmp_env_consistency_check )
2740 __kmp_pop_sync( global_tid, ct_reduce, loc );
2741
2742 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2743
2744 //AT: performance issue: a real barrier here
2745 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2746 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2747 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2748 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2749 // and be more in line with sense of NOWAIT
2750 //AT: TO DO: do epcc test and compare times
2751
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002752 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002754#if USE_ITT_NOTIFY
2755 __kmp_threads[global_tid]->th.th_ident = loc;
2756#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002757 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2758 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2759
2760 // all other workers except master should do this pop here
2761 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2762 if ( __kmp_env_consistency_check ) {
2763 if( retval == 0 ) {
2764 __kmp_pop_sync( global_tid, ct_reduce, loc );
2765 }
2766 }
2767
2768 } else {
2769
2770 // should never reach this block
2771 KMP_ASSERT( 0 ); // "unexpected method"
2772
2773 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002774#if OMP_40_ENABLED
2775 if( teams_swapped ) {
2776 // Restore thread structure
2777 th->th.th_info.ds.ds_tid = 0;
2778 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002779 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002780 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002781 th->th.th_task_state = task_state;
2782 }
2783#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2785
2786 return retval;
2787}
2788
2789/*!
2790@ingroup SYNCHRONIZATION
2791@param loc source location information
2792@param global_tid global thread id.
2793@param lck pointer to the unique lock data structure
2794
2795Finish the execution of a reduce nowait.
2796*/
2797void
2798__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2799
2800 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2801
2802 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2803
2804 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2805
2806 if( packed_reduction_method == critical_reduce_block ) {
2807
2808 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2809
2810 } else if( packed_reduction_method == empty_reduce_block ) {
2811
2812 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2813
2814 } else if( packed_reduction_method == atomic_reduce_block ) {
2815
2816 // neither master nor other workers should get here
2817 // (code gen does not generate this call in case 2: atomic reduce block)
2818 // actually it's better to remove this elseif at all;
2819 // after removal this value will checked by the 'else' and will assert
2820
2821 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2822
2823 // only master gets here
2824
2825 } else {
2826
2827 // should never reach this block
2828 KMP_ASSERT( 0 ); // "unexpected method"
2829
2830 }
2831
2832 if ( __kmp_env_consistency_check )
2833 __kmp_pop_sync( global_tid, ct_reduce, loc );
2834
2835 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2836
2837 return;
2838}
2839
2840/* 2.a.ii. Reduce Block with a terminating barrier */
2841
2842/*!
2843@ingroup SYNCHRONIZATION
2844@param loc source location information
2845@param global_tid global thread number
2846@param num_vars number of items (variables) to be reduced
2847@param reduce_size size of data in bytes to be reduced
2848@param reduce_data pointer to data to be reduced
2849@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2850@param lck pointer to the unique lock data structure
2851@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2852
2853A blocking reduce that includes an implicit barrier.
2854*/
2855kmp_int32
2856__kmpc_reduce(
2857 ident_t *loc, kmp_int32 global_tid,
2858 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2859 void (*reduce_func)(void *lhs_data, void *rhs_data),
2860 kmp_critical_name *lck )
2861{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002862 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002863 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002864 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2865
2866 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2867
2868 // why do we need this initialization here at all?
2869 // Reduction clause can not be a stand-alone directive.
2870
2871 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2872 // possible detection of false-positive race by the threadchecker ???
2873 if( ! TCR_4( __kmp_init_parallel ) )
2874 __kmp_parallel_initialize();
2875
2876 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002877#if KMP_USE_DYNAMIC_LOCK
2878 if ( __kmp_env_consistency_check )
2879 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2880#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 if ( __kmp_env_consistency_check )
2882 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002883#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002884
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2886 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2887
2888 if( packed_reduction_method == critical_reduce_block ) {
2889
2890 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2891 retval = 1;
2892
2893 } else if( packed_reduction_method == empty_reduce_block ) {
2894
2895 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2896 retval = 1;
2897
2898 } else if( packed_reduction_method == atomic_reduce_block ) {
2899
2900 retval = 2;
2901
2902 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2903
2904 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002905 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002906 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002907#if USE_ITT_NOTIFY
2908 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2909#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002910 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2911 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2912
2913 // all other workers except master should do this pop here
2914 // ( none of other workers except master will enter __kmpc_end_reduce() )
2915 if ( __kmp_env_consistency_check ) {
2916 if( retval == 0 ) { // 0: all other workers; 1: master
2917 __kmp_pop_sync( global_tid, ct_reduce, loc );
2918 }
2919 }
2920
2921 } else {
2922
2923 // should never reach this block
2924 KMP_ASSERT( 0 ); // "unexpected method"
2925
2926 }
2927
2928 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2929
2930 return retval;
2931}
2932
2933/*!
2934@ingroup SYNCHRONIZATION
2935@param loc source location information
2936@param global_tid global thread id.
2937@param lck pointer to the unique lock data structure
2938
2939Finish the execution of a blocking reduce.
2940The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2941*/
2942void
2943__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2944
2945 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2946
2947 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2948
2949 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2950
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002951 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952 // (it's a terminating barrier on constructs if NOWAIT not specified)
2953
2954 if( packed_reduction_method == critical_reduce_block ) {
2955
2956 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2957
2958 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002959#if USE_ITT_NOTIFY
2960 __kmp_threads[global_tid]->th.th_ident = loc;
2961#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2963
2964 } else if( packed_reduction_method == empty_reduce_block ) {
2965
2966 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2967
2968 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002969#if USE_ITT_NOTIFY
2970 __kmp_threads[global_tid]->th.th_ident = loc;
2971#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2973
2974 } else if( packed_reduction_method == atomic_reduce_block ) {
2975
2976 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002977#if USE_ITT_NOTIFY
2978 __kmp_threads[global_tid]->th.th_ident = loc;
2979#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2981
2982 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2983
2984 // only master executes here (master releases all other workers)
2985 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2986
2987 } else {
2988
2989 // should never reach this block
2990 KMP_ASSERT( 0 ); // "unexpected method"
2991
2992 }
2993
2994 if ( __kmp_env_consistency_check )
2995 __kmp_pop_sync( global_tid, ct_reduce, loc );
2996
2997 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2998
2999 return;
3000}
3001
3002#undef __KMP_GET_REDUCTION_METHOD
3003#undef __KMP_SET_REDUCTION_METHOD
3004
3005/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
3006
3007kmp_uint64
3008__kmpc_get_taskid() {
3009
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003010 kmp_int32 gtid;
3011 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003012
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003013 gtid = __kmp_get_gtid();
3014 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003015 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003016 }; // if
3017 thread = __kmp_thread_from_gtid( gtid );
3018 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019
3020} // __kmpc_get_taskid
3021
3022
3023kmp_uint64
3024__kmpc_get_parent_taskid() {
3025
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003026 kmp_int32 gtid;
3027 kmp_info_t * thread;
3028 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003029
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003030 gtid = __kmp_get_gtid();
3031 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003033 }; // if
3034 thread = __kmp_thread_from_gtid( gtid );
3035 parent_task = thread->th.th_current_task->td_parent;
3036 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037
3038} // __kmpc_get_parent_taskid
3039
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003040void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042 if ( ! __kmp_init_serial ) {
3043 __kmp_serial_initialize();
3044 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003045 __kmp_place_num_sockets = nS;
3046 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003048 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050}
3051
3052// end of file //
3053