blob: f774ac14c009bdb92011ae267ef5936e5e9176f3 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#if OMPT_SUPPORT
25#include "ompt-internal.h"
26#include "ompt-specific.h"
27#endif
28
Jim Cownie5e8470a2013-09-27 10:38:44 +000029#define MAX_MESSAGE 512
30
31/* ------------------------------------------------------------------------ */
32/* ------------------------------------------------------------------------ */
33
34/* flags will be used in future, e.g., to implement */
35/* openmp_strict library restrictions */
36
37/*!
38 * @ingroup STARTUP_SHUTDOWN
39 * @param loc in source location information
40 * @param flags in for future use (currently ignored)
41 *
42 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000043 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000044 *
45 */
46void
47__kmpc_begin(ident_t *loc, kmp_int32 flags)
48{
49 // By default __kmp_ignore_mppbeg() returns TRUE.
50 if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
52
53 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
54 }
55}
56
57/*!
58 * @ingroup STARTUP_SHUTDOWN
59 * @param loc source location information
60 *
61 * Shutdown the runtime library. This is also optional, and even if called will not
62 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
63 */
64void
65__kmpc_end(ident_t *loc)
66{
67 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
68 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
69 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
70 // will unregister this root (it can cause library shut down).
71 if (__kmp_ignore_mppend() == FALSE) {
72 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
73 KA_TRACE( 30, ("__kmpc_end\n" ));
74
75 __kmp_internal_end_thread( -1 );
76 }
77}
78
79/*!
80@ingroup THREAD_STATES
81@param loc Source location information.
82@return The global thread index of the active thread.
83
84This function can be called in any context.
85
86If the runtime has ony been entered at the outermost level from a
87single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000088which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000089active parallel construct. (Or zero if there is no active parallel
90construct, since the master thread is necessarily thread zero).
91
92If multiple non-OpenMP threads all enter an OpenMP construct then this
93will be a unique thread identifier among all the threads created by
94the OpenMP runtime (but the value cannote be defined in terms of
95OpenMP thread ids returned by omp_get_thread_num()).
96
97*/
98kmp_int32
99__kmpc_global_thread_num(ident_t *loc)
100{
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
104
105 return gtid;
106}
107
108/*!
109@ingroup THREAD_STATES
110@param loc Source location information.
111@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113This function can be called in any context.
114It returns the total number of threads under the control of the OpenMP runtime. That is
115not a number that can be determined by any OpenMP standard calls, since the library may be
116called from more than one non-OpenMP thread, and this reflects the total over all such calls.
117Similarly the runtime maintains underlying threads even when they are not active (since the cost
118of creating and destroying OS threads is high), this call counts all such threads even if they are not
119waiting for work.
120*/
121kmp_int32
122__kmpc_global_num_threads(ident_t *loc)
123{
124 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
125
126 return TCR_4(__kmp_nth);
127}
128
129/*!
130@ingroup THREAD_STATES
131@param loc Source location information.
132@return The thread number of the calling thread in the innermost active parallel construct.
133
134*/
135kmp_int32
136__kmpc_bound_thread_num(ident_t *loc)
137{
138 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
139 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
147kmp_int32
148__kmpc_bound_num_threads(ident_t *loc)
149{
150 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
151
152 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
153}
154
155/*!
156 * @ingroup DEPRECATED
157 * @param loc location description
158 *
159 * This function need not be called. It always returns TRUE.
160 */
161kmp_int32
162__kmpc_ok_to_fork(ident_t *loc)
163{
164#ifndef KMP_DEBUG
165
166 return TRUE;
167
168#else
169
170 const char *semi2;
171 const char *semi3;
172 int line_no;
173
174 if (__kmp_par_range == 0) {
175 return TRUE;
176 }
177 semi2 = loc->psource;
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 semi2 = strchr(semi2, ';');
182 if (semi2 == NULL) {
183 return TRUE;
184 }
185 semi2 = strchr(semi2 + 1, ';');
186 if (semi2 == NULL) {
187 return TRUE;
188 }
189 if (__kmp_par_range_filename[0]) {
190 const char *name = semi2 - 1;
191 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
192 name--;
193 }
194 if ((*name == '/') || (*name == ';')) {
195 name++;
196 }
197 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
198 return __kmp_par_range < 0;
199 }
200 }
201 semi3 = strchr(semi2 + 1, ';');
202 if (__kmp_par_range_routine[0]) {
203 if ((semi3 != NULL) && (semi3 > semi2)
204 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
205 return __kmp_par_range < 0;
206 }
207 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000208 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
210 return __kmp_par_range > 0;
211 }
212 return __kmp_par_range < 0;
213 }
214 return TRUE;
215
216#endif /* KMP_DEBUG */
217
218}
219
220/*!
221@ingroup THREAD_STATES
222@param loc Source location information.
223@return 1 if this thread is executing inside an active parallel region, zero if not.
224*/
225kmp_int32
226__kmpc_in_parallel( ident_t *loc )
227{
228 return __kmp_entry_thread() -> th.th_root -> r.r_active;
229}
230
231/*!
232@ingroup PARALLEL
233@param loc source location information
234@param global_tid global thread number
235@param num_threads number of threads requested for this parallel construct
236
237Set the number of threads to be used by the next fork spawned by this thread.
238This call is only required if the parallel construct has a `num_threads` clause.
239*/
240void
241__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
242{
243 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
244 global_tid, num_threads ) );
245
246 __kmp_push_num_threads( loc, global_tid, num_threads );
247}
248
249void
250__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
251{
252 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
253
254 /* the num_threads are automatically popped */
255}
256
257
258#if OMP_40_ENABLED
259
260void
261__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
262{
263 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
264 global_tid, proc_bind ) );
265
266 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
267}
268
269#endif /* OMP_40_ENABLED */
270
271
272/*!
273@ingroup PARALLEL
274@param loc source location information
275@param argc total number of arguments in the ellipsis
276@param microtask pointer to callback routine consisting of outlined parallel construct
277@param ... pointers to shared variables that aren't global
278
279Do the actual fork and call the microtask in the relevant number of threads.
280*/
281void
282__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
283{
284 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000285
Jonathan Peyton61118492016-05-20 19:03:38 +0000286#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000287 int inParallel = __kmpc_in_parallel(loc);
288 if (inParallel)
289 {
290 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
291 }
292 else
293 {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000294 KMP_COUNT_BLOCK(OMP_PARALLEL);
295 }
296#endif
297
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 // maybe to save thr_state is enough here
299 {
300 va_list ap;
301 va_start( ap, microtask );
302
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#if OMPT_SUPPORT
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000304 ompt_frame_t* ompt_frame;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000305 if (ompt_enabled) {
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000306 kmp_info_t *master_th = __kmp_threads[ gtid ];
307 kmp_team_t *parent_team = master_th->th.th_team;
308 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
309 if (lwt)
310 ompt_frame = &(lwt->ompt_task_info.frame);
311 else
312 {
313 int tid = __kmp_tid_from_gtid( gtid );
314 ompt_frame = &(parent_team->t.t_implicit_task_taskdata[tid].
315 ompt_task_info.frame);
316 }
317 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000318 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000319#endif
320
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000321#if INCLUDE_SSC_MARKS
322 SSC_MARK_FORKING();
323#endif
324 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000326#if OMPT_SUPPORT
327 VOLATILE_CAST(void *) microtask, // "unwrapped" task
328#endif
329 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
331/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000332#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333 &ap
334#else
335 ap
336#endif
337 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000338#if INCLUDE_SSC_MARKS
339 SSC_MARK_JOINING();
340#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000341 __kmp_join_call( loc, gtid
342#if OMPT_SUPPORT
343 , fork_context_intel
344#endif
345 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346
347 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000348
Jim Cownie5e8470a2013-09-27 10:38:44 +0000349 }
350}
351
352#if OMP_40_ENABLED
353/*!
354@ingroup PARALLEL
355@param loc source location information
356@param global_tid global thread number
357@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000358@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359
360Set the number of teams to be used by the teams construct.
361This call is only required if the teams construct has a `num_teams` clause
362or a `thread_limit` clause (or both).
363*/
364void
365__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
366{
367 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
368 global_tid, num_teams, num_threads ) );
369
370 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
371}
372
373/*!
374@ingroup PARALLEL
375@param loc source location information
376@param argc total number of arguments in the ellipsis
377@param microtask pointer to callback routine consisting of outlined teams construct
378@param ... pointers to shared variables that aren't global
379
380Do the actual fork and call the microtask in the relevant number of threads.
381*/
382void
383__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
384{
385 int gtid = __kmp_entry_gtid();
386 kmp_info_t *this_thr = __kmp_threads[ gtid ];
387 va_list ap;
388 va_start( ap, microtask );
389
Jonathan Peyton45be4502015-08-11 21:36:41 +0000390 KMP_COUNT_BLOCK(OMP_TEAMS);
391
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000393 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
395
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000396#if OMPT_SUPPORT
397 kmp_team_t *parent_team = this_thr->th.th_team;
398 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000399 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400 parent_team->t.t_implicit_task_taskdata[tid].
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000401 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000402 }
403#endif
404
Jim Cownie5e8470a2013-09-27 10:38:44 +0000405 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000406 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000407 __kmp_push_num_teams( loc, gtid, 0, 0 );
408 }
409 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000413 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000415#if OMPT_SUPPORT
416 VOLATILE_CAST(void *) microtask, // "unwrapped" task
417#endif
418 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000420#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000421 &ap
422#else
423 ap
424#endif
425 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000426 __kmp_join_call( loc, gtid
427#if OMPT_SUPPORT
428 , fork_context_intel
429#endif
430 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000431
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000432 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000434 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435 va_end( ap );
436}
437#endif /* OMP_40_ENABLED */
438
439
440//
441// I don't think this function should ever have been exported.
442// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
443// openmp code ever called it, but it's been exported from the RTL for so
444// long that I'm afraid to remove the definition.
445//
446int
447__kmpc_invoke_task_func( int gtid )
448{
449 return __kmp_invoke_task_func( gtid );
450}
451
452/*!
453@ingroup PARALLEL
454@param loc source location information
455@param global_tid global thread number
456
457Enter a serialized parallel construct. This interface is used to handle a
458conditional parallel region, like this,
459@code
460#pragma omp parallel if (condition)
461@endcode
462when the condition is false.
463*/
464void
465__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
466{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000467 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
468 * kmp_fork_call since the tasks to be done are similar in each case.
469 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470}
471
472/*!
473@ingroup PARALLEL
474@param loc source location information
475@param global_tid global thread number
476
477Leave a serialized parallel construct.
478*/
479void
480__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
481{
482 kmp_internal_control_t *top;
483 kmp_info_t *this_thr;
484 kmp_team_t *serial_team;
485
486 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
487
488 /* skip all this code for autopar serialized loops since it results in
489 unacceptable overhead */
490 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
491 return;
492
493 // Not autopar code
494 if( ! TCR_4( __kmp_init_parallel ) )
495 __kmp_parallel_initialize();
496
497 this_thr = __kmp_threads[ global_tid ];
498 serial_team = this_thr->th.th_serial_team;
499
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000500 #if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000501 kmp_task_team_t * task_team = this_thr->th.th_task_team;
502
503 // we need to wait for the proxy tasks before finishing the thread
504 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
Jonathan Peyton7abf9d52016-05-26 18:19:10 +0000505 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) ); // is an ITT object needed here?
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000506 #endif
507
Jim Cownie5e8470a2013-09-27 10:38:44 +0000508 KMP_MB();
509 KMP_DEBUG_ASSERT( serial_team );
510 KMP_ASSERT( serial_team -> t.t_serialized );
511 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
512 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
513 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
514 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
515
516 /* If necessary, pop the internal control stack values and replace the team values */
517 top = serial_team -> t.t_control_stack_top;
518 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000519 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000520 serial_team -> t.t_control_stack_top = top -> next;
521 __kmp_free(top);
522 }
523
Jim Cownie5e8470a2013-09-27 10:38:44 +0000524 //if( serial_team -> t.t_serialized > 1 )
525 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526
527 /* pop dispatch buffers stack */
528 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
529 {
530 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
531 serial_team->t.t_dispatch->th_disp_buffer =
532 serial_team->t.t_dispatch->th_disp_buffer->next;
533 __kmp_free( disp_buffer );
534 }
535
536 -- serial_team -> t.t_serialized;
537 if ( serial_team -> t.t_serialized == 0 ) {
538
539 /* return to the parallel section */
540
541#if KMP_ARCH_X86 || KMP_ARCH_X86_64
542 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
543 __kmp_clear_x87_fpu_status_word();
544 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
545 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
546 }
547#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
548
549 this_thr -> th.th_team = serial_team -> t.t_parent;
550 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
551
552 /* restore values cached in the thread */
553 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
554 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
555 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
556
557 /* TODO the below shouldn't need to be adjusted for serialized teams */
558 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
559 t.t_dispatch[ serial_team -> t.t_master_tid ];
560
Jim Cownie5e8470a2013-09-27 10:38:44 +0000561 __kmp_pop_current_task_from_thread( this_thr );
562
563 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
564 this_thr -> th.th_current_task -> td_flags.executing = 1;
565
566 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000567 // Copy the task team from the new child / old parent team to the thread.
568 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
570 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
571 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000572 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000573 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
574 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
575 global_tid, serial_team, serial_team -> t.t_serialized ) );
576 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000577 }
578
Jim Cownie5e8470a2013-09-27 10:38:44 +0000579 if ( __kmp_env_consistency_check )
580 __kmp_pop_parallel( global_tid, NULL );
581}
582
583/*!
584@ingroup SYNCHRONIZATION
585@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000587Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588depending on the memory ordering convention obeyed by the compiler
589even that may not be necessary).
590*/
591void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000592__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593{
594 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
595
596 /* need explicit __mf() here since use volatile instead in library */
597 KMP_MB(); /* Flush all pending memory write invalidates. */
598
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000599 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
600 #if KMP_MIC
601 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
602 // We shouldn't need it, though, since the ABI rules require that
603 // * If the compiler generates NGO stores it also generates the fence
604 // * If users hand-code NGO stores they should insert the fence
605 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000607 // C74404
608 // This is to address non-temporal store instructions (sfence needed).
609 // The clflush instruction is addressed either (mfence needed).
610 // Probably the non-temporal load monvtdqa instruction should also be addressed.
611 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
612 if ( ! __kmp_cpuinfo.initialized ) {
613 __kmp_query_cpuid( & __kmp_cpuinfo );
614 }; // if
615 if ( ! __kmp_cpuinfo.sse2 ) {
616 // CPU cannot execute SSE2 instructions.
617 } else {
Jonathan Peyton61118492016-05-20 19:03:38 +0000618 #if KMP_COMPILER_ICC
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000619 _mm_mfence();
Jonathan Peytonb7d30cb2016-03-23 16:27:25 +0000620 #elif KMP_COMPILER_MSVC
621 MemoryBarrier();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000622 #else
623 __sync_synchronize();
624 #endif // KMP_COMPILER_ICC
625 }; // if
626 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000627 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
628 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000629 #elif KMP_ARCH_PPC64
630 // Nothing needed here (we have a real MB above).
631 #if KMP_OS_CNK
632 // The flushing thread needs to yield here; this prevents a
633 // busy-waiting thread from saturating the pipeline. flush is
634 // often used in loops like this:
635 // while (!flag) {
636 // #pragma omp flush(flag)
637 // }
638 // and adding the yield here is good for at least a 10x speedup
639 // when running >2 threads per core (on the NAS LU benchmark).
640 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000641 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000642 #else
643 #error Unknown or unsupported architecture
644 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645
646}
647
648/* -------------------------------------------------------------------------- */
649
650/* -------------------------------------------------------------------------- */
651
652/*!
653@ingroup SYNCHRONIZATION
654@param loc source location information
655@param global_tid thread id.
656
657Execute a barrier.
658*/
659void
660__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
661{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000662 KMP_COUNT_BLOCK(OMP_BARRIER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
664
665 if (! TCR_4(__kmp_init_parallel))
666 __kmp_parallel_initialize();
667
668 if ( __kmp_env_consistency_check ) {
669 if ( loc == 0 ) {
670 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
671 }; // if
672
673 __kmp_check_barrier( global_tid, ct_barrier, loc );
674 }
675
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000676#if OMPT_SUPPORT && OMPT_TRACE
677 ompt_frame_t * ompt_frame;
678 if (ompt_enabled ) {
Jonas Hahnfeld848d6902016-09-14 13:59:39 +0000679 ompt_frame = __ompt_get_task_frame_internal(0);
680 if ( ompt_frame->reenter_runtime_frame == NULL )
681 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000682 }
683#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000684 __kmp_threads[ global_tid ]->th.th_ident = loc;
685 // TODO: explicit barrier_wait_id:
686 // this function is called when 'barrier' directive is present or
687 // implicit barrier at the end of a worksharing construct.
688 // 1) better to add a per-thread barrier counter to a thread data structure
689 // 2) set to 0 when a new team is created
690 // 4) no sync is required
691
692 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000693#if OMPT_SUPPORT && OMPT_TRACE
694 if (ompt_enabled ) {
695 ompt_frame->reenter_runtime_frame = NULL;
696 }
697#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698}
699
700/* The BARRIER for a MASTER section is always explicit */
701/*!
702@ingroup WORK_SHARING
703@param loc source location information.
704@param global_tid global thread number .
705@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
706*/
707kmp_int32
708__kmpc_master(ident_t *loc, kmp_int32 global_tid)
709{
710 int status = 0;
711
712 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
713
714 if( ! TCR_4( __kmp_init_parallel ) )
715 __kmp_parallel_initialize();
716
Jonathan Peyton45be4502015-08-11 21:36:41 +0000717 if( KMP_MASTER_GTID( global_tid )) {
Jonathan Peyton30138252016-03-03 21:21:05 +0000718 KMP_COUNT_BLOCK(OMP_MASTER);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000719 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000721 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000723#if OMPT_SUPPORT && OMPT_TRACE
724 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000725 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000726 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000727 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
728 kmp_team_t *team = this_thr -> th.th_team;
729
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000730 int tid = __kmp_tid_from_gtid( global_tid );
731 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
732 team->t.ompt_team_info.parallel_id,
733 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
734 }
735 }
736#endif
737
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000739#if KMP_USE_DYNAMIC_LOCK
740 if (status)
741 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
742 else
743 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
744#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 if (status)
746 __kmp_push_sync( global_tid, ct_master, loc, NULL );
747 else
748 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000749#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750 }
751
752 return status;
753}
754
755/*!
756@ingroup WORK_SHARING
757@param loc source location information.
758@param global_tid global thread number .
759
760Mark the end of a <tt>master</tt> region. This should only be called by the thread
761that executes the <tt>master</tt> region.
762*/
763void
764__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
765{
766 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
767
768 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000769 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000771#if OMPT_SUPPORT && OMPT_TRACE
772 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
773 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000774 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000775 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
776 int tid = __kmp_tid_from_gtid( global_tid );
777 ompt_callbacks.ompt_callback(ompt_event_master_end)(
778 team->t.ompt_team_info.parallel_id,
779 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
780 }
781#endif
782
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783 if ( __kmp_env_consistency_check ) {
784 if( global_tid < 0 )
785 KMP_WARNING( ThreadIdentInvalid );
786
787 if( KMP_MASTER_GTID( global_tid ))
788 __kmp_pop_sync( global_tid, ct_master, loc );
789 }
790}
791
792/*!
793@ingroup WORK_SHARING
794@param loc source location information.
795@param gtid global thread number.
796
797Start execution of an <tt>ordered</tt> construct.
798*/
799void
800__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
801{
802 int cid = 0;
803 kmp_info_t *th;
804 KMP_DEBUG_ASSERT( __kmp_init_serial );
805
806 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
807
808 if (! TCR_4(__kmp_init_parallel))
809 __kmp_parallel_initialize();
810
811#if USE_ITT_BUILD
812 __kmp_itt_ordered_prep( gtid );
813 // TODO: ordered_wait_id
814#endif /* USE_ITT_BUILD */
815
816 th = __kmp_threads[ gtid ];
817
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000818#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000819 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000820 /* OMPT state update */
821 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
822 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
823
824 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000825 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000826 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
827 th->th.ompt_thread_info.wait_id);
828 }
829 }
830#endif
831
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
833 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
834 else
835 __kmp_parallel_deo( & gtid, & cid, loc );
836
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000837#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000838 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000839 /* OMPT state update */
840 th->th.ompt_thread_info.state = ompt_state_work_parallel;
841 th->th.ompt_thread_info.wait_id = 0;
842
843 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000844 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000845 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
846 th->th.ompt_thread_info.wait_id);
847 }
848 }
849#endif
850
Jim Cownie5e8470a2013-09-27 10:38:44 +0000851#if USE_ITT_BUILD
852 __kmp_itt_ordered_start( gtid );
853#endif /* USE_ITT_BUILD */
854}
855
856/*!
857@ingroup WORK_SHARING
858@param loc source location information.
859@param gtid global thread number.
860
861End execution of an <tt>ordered</tt> construct.
862*/
863void
864__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
865{
866 int cid = 0;
867 kmp_info_t *th;
868
869 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
870
871#if USE_ITT_BUILD
872 __kmp_itt_ordered_end( gtid );
873 // TODO: ordered_wait_id
874#endif /* USE_ITT_BUILD */
875
876 th = __kmp_threads[ gtid ];
877
878 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
879 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
880 else
881 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000882
883#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000884 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000885 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
886 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
887 th->th.ompt_thread_info.wait_id);
888 }
889#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890}
891
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000892#if KMP_USE_DYNAMIC_LOCK
893
Jonathan Peytondae13d82015-12-11 21:57:06 +0000894static __forceinline void
895__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000896{
Jonathan Peytondae13d82015-12-11 21:57:06 +0000897 // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
898 void *idx;
899 kmp_indirect_lock_t **lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000900 lck = (kmp_indirect_lock_t **)crit;
Jonathan Peytondae13d82015-12-11 21:57:06 +0000901 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
902 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
903 KMP_SET_I_LOCK_LOCATION(ilk, loc);
904 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
905 KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000906#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000907 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000908#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000909 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
910 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000911#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000912 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000913#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000914 // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
915 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000916 }
Jonathan Peytondae13d82015-12-11 21:57:06 +0000917 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000918}
919
920// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000921#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000922 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000923 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
924 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000925 kmp_uint32 spins; \
926 KMP_FSYNC_PREPARE(l); \
927 KMP_INIT_YIELD(spins); \
928 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
929 KMP_YIELD(TRUE); \
930 } else { \
931 KMP_YIELD_SPIN(spins); \
932 } \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000933 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000934 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
935 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000936 __kmp_spin_backoff(&backoff); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000937 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
938 KMP_YIELD(TRUE); \
939 } else { \
940 KMP_YIELD_SPIN(spins); \
941 } \
942 } \
943 } \
944 KMP_FSYNC_ACQUIRED(l); \
945}
946
947// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000948#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000949 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000950 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
951 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000952}
953
954// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000955#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
956 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000957 KMP_MB(); \
958}
959
Jonathan Peytondae13d82015-12-11 21:57:06 +0000960#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000961
962# include <unistd.h>
963# include <sys/syscall.h>
964# ifndef FUTEX_WAIT
965# define FUTEX_WAIT 0
966# endif
967# ifndef FUTEX_WAKE
968# define FUTEX_WAKE 1
969# endif
970
971// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000972#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000973 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
974 kmp_int32 gtid_code = (gtid+1) << 1; \
975 KMP_MB(); \
976 KMP_FSYNC_PREPARE(ftx); \
977 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000978 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
979 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
980 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000981 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000982 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000983 continue; \
984 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000985 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000986 } \
987 kmp_int32 rc; \
988 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
989 continue; \
990 } \
991 gtid_code |= 1; \
992 } \
993 KMP_FSYNC_ACQUIRED(ftx); \
994}
995
996// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000997#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000998 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytoneeec4c82016-06-22 16:36:07 +0000999 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001000 KMP_FSYNC_ACQUIRED(ftx); \
1001 rc = TRUE; \
1002 } else { \
1003 rc = FALSE; \
1004 } \
1005}
1006
1007// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001008#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001009 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1010 KMP_MB(); \
1011 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001012 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1013 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1014 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001015 } \
1016 KMP_MB(); \
1017 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1018}
1019
Jonathan Peytondae13d82015-12-11 21:57:06 +00001020#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001021
1022#else // KMP_USE_DYNAMIC_LOCK
1023
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024static kmp_user_lock_p
1025__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1026{
1027 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1028
1029 //
1030 // Because of the double-check, the following load
1031 // doesn't need to be volatile.
1032 //
1033 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1034
1035 if ( lck == NULL ) {
1036 void * idx;
1037
1038 // Allocate & initialize the lock.
1039 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1040 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1041 __kmp_init_user_lock_with_checks( lck );
1042 __kmp_set_user_lock_location( lck, loc );
1043#if USE_ITT_BUILD
1044 __kmp_itt_critical_creating( lck );
1045 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1046 // lock. It is the only place where we can guarantee it. There are chances the lock will
1047 // destroyed with no usage, but it is not a problem, because this is not real event seen
1048 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1049#endif /* USE_ITT_BUILD */
1050
1051 //
1052 // Use a cmpxchg instruction to slam the start of the critical
1053 // section with the lock pointer. If another thread beat us
1054 // to it, deallocate the lock, and use the lock that the other
1055 // thread allocated.
1056 //
1057 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1058
1059 if ( status == 0 ) {
1060 // Deallocate the lock and reload the value.
1061#if USE_ITT_BUILD
1062 __kmp_itt_critical_destroyed( lck );
1063 // Let ITT know the lock is destroyed and the same memory location may be reused for
1064 // another purpose.
1065#endif /* USE_ITT_BUILD */
1066 __kmp_destroy_user_lock_with_checks( lck );
1067 __kmp_user_lock_free( &idx, gtid, lck );
1068 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1069 KMP_DEBUG_ASSERT( lck != NULL );
1070 }
1071 }
1072 return lck;
1073}
1074
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001075#endif // KMP_USE_DYNAMIC_LOCK
1076
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077/*!
1078@ingroup WORK_SHARING
1079@param loc source location information.
1080@param global_tid global thread number .
1081@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1082some other suitably unique value.
1083
1084Enter code protected by a `critical` construct.
1085This function blocks until the executing thread can enter the critical section.
1086*/
1087void
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001088__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1089{
1090#if KMP_USE_DYNAMIC_LOCK
1091 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1092#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001093 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001094 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095 kmp_user_lock_p lck;
1096
1097 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1098
1099 //TODO: add THR_OVHD_STATE
1100
1101 KMP_CHECK_USER_LOCK_INIT();
1102
1103 if ( ( __kmp_user_lock_kind == lk_tas )
1104 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1105 lck = (kmp_user_lock_p)crit;
1106 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001107#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108 else if ( ( __kmp_user_lock_kind == lk_futex )
1109 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1110 lck = (kmp_user_lock_p)crit;
1111 }
1112#endif
1113 else { // ticket, queuing or drdpa
1114 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1115 }
1116
1117 if ( __kmp_env_consistency_check )
1118 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1119
1120 /* since the critical directive binds to all threads, not just
1121 * the current team we have to check this even if we are in a
1122 * serialized team */
1123 /* also, even if we are the uber thread, we still have to conduct the lock,
1124 * as we have to contend with sibling threads */
1125
1126#if USE_ITT_BUILD
1127 __kmp_itt_critical_acquiring( lck );
1128#endif /* USE_ITT_BUILD */
1129 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1131
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001132#if USE_ITT_BUILD
1133 __kmp_itt_critical_acquired( lck );
1134#endif /* USE_ITT_BUILD */
1135
Jonathan Peyton93a879c2016-03-21 18:32:26 +00001136 KMP_START_EXPLICIT_TIMER(OMP_critical);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001137 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001138#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001139}
1140
1141#if KMP_USE_DYNAMIC_LOCK
1142
1143// Converts the given hint to an internal lock implementation
1144static __forceinline kmp_dyna_lockseq_t
1145__kmp_map_hint_to_lock(uintptr_t hint)
1146{
1147#if KMP_USE_TSX
1148# define KMP_TSX_LOCK(seq) lockseq_##seq
1149#else
1150# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1151#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001152
1153#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1154# define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1155#else
1156# define KMP_CPUINFO_RTM 0
1157#endif
1158
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001159 // Hints that do not require further logic
1160 if (hint & kmp_lock_hint_hle)
1161 return KMP_TSX_LOCK(hle);
1162 if (hint & kmp_lock_hint_rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001163 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001164 if (hint & kmp_lock_hint_adaptive)
Hal Finkel01bb2402016-03-27 13:24:09 +00001165 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001166
1167 // Rule out conflicting hints first by returning the default lock
1168 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1169 return __kmp_user_lock_seq;
1170 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1171 return __kmp_user_lock_seq;
1172
1173 // Do not even consider speculation when it appears to be contended
1174 if (hint & omp_lock_hint_contended)
1175 return lockseq_queuing;
1176
1177 // Uncontended lock without speculation
1178 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1179 return lockseq_tas;
1180
1181 // HLE lock for speculation
1182 if (hint & omp_lock_hint_speculative)
1183 return KMP_TSX_LOCK(hle);
1184
1185 return __kmp_user_lock_seq;
1186}
1187
1188/*!
1189@ingroup WORK_SHARING
1190@param loc source location information.
1191@param global_tid global thread number.
1192@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
1193or some other suitably unique value.
1194@param hint the lock hint.
1195
1196Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
1197This function blocks until the executing thread can enter the critical section unless the hint suggests use of
1198speculative execution and the hardware supports it.
1199*/
1200void
1201__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1202{
1203 KMP_COUNT_BLOCK(OMP_CRITICAL);
1204 kmp_user_lock_p lck;
1205
1206 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1207
1208 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1209 // Check if it is initialized.
1210 if (*lk == 0) {
1211 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1212 if (KMP_IS_D_LOCK(lckseq)) {
1213 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1214 } else {
1215 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1216 }
1217 }
1218 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1219 // this lock initialization does not follow the normal dispatch path (lock table is not used).
1220 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1221 lck = (kmp_user_lock_p)lk;
1222 if (__kmp_env_consistency_check) {
1223 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1224 }
1225# if USE_ITT_BUILD
1226 __kmp_itt_critical_acquiring(lck);
1227# endif
1228# if KMP_USE_INLINED_TAS
1229 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1230 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1231 } else
1232# elif KMP_USE_INLINED_FUTEX
1233 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1234 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1235 } else
1236# endif
1237 {
1238 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1239 }
1240 } else {
1241 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1242 lck = ilk->lock;
1243 if (__kmp_env_consistency_check) {
1244 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1245 }
1246# if USE_ITT_BUILD
1247 __kmp_itt_critical_acquiring(lck);
1248# endif
1249 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1250 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001251
Jim Cownie5e8470a2013-09-27 10:38:44 +00001252#if USE_ITT_BUILD
1253 __kmp_itt_critical_acquired( lck );
1254#endif /* USE_ITT_BUILD */
1255
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001256 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001257 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001258} // __kmpc_critical_with_hint
1259
1260#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001261
1262/*!
1263@ingroup WORK_SHARING
1264@param loc source location information.
1265@param global_tid global thread number .
1266@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1267some other suitably unique value.
1268
1269Leave a critical section, releasing any lock that was held during its execution.
1270*/
1271void
1272__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1273{
1274 kmp_user_lock_p lck;
1275
1276 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1277
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001278#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001279 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001280 lck = (kmp_user_lock_p)crit;
1281 KMP_ASSERT(lck != NULL);
1282 if (__kmp_env_consistency_check) {
1283 __kmp_pop_sync(global_tid, ct_critical, loc);
1284 }
1285# if USE_ITT_BUILD
1286 __kmp_itt_critical_releasing( lck );
1287# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00001288# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001289 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001290 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001291 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00001292# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001293 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001294 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001295 } else
1296# endif
1297 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001298 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001299 }
1300 } else {
1301 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1302 KMP_ASSERT(ilk != NULL);
1303 lck = ilk->lock;
1304 if (__kmp_env_consistency_check) {
1305 __kmp_pop_sync(global_tid, ct_critical, loc);
1306 }
1307# if USE_ITT_BUILD
1308 __kmp_itt_critical_releasing( lck );
1309# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001310 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001311 }
1312
1313#else // KMP_USE_DYNAMIC_LOCK
1314
Jim Cownie5e8470a2013-09-27 10:38:44 +00001315 if ( ( __kmp_user_lock_kind == lk_tas )
1316 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1317 lck = (kmp_user_lock_p)crit;
1318 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001319#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001320 else if ( ( __kmp_user_lock_kind == lk_futex )
1321 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1322 lck = (kmp_user_lock_p)crit;
1323 }
1324#endif
1325 else { // ticket, queuing or drdpa
1326 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1327 }
1328
1329 KMP_ASSERT(lck != NULL);
1330
1331 if ( __kmp_env_consistency_check )
1332 __kmp_pop_sync( global_tid, ct_critical, loc );
1333
1334#if USE_ITT_BUILD
1335 __kmp_itt_critical_releasing( lck );
1336#endif /* USE_ITT_BUILD */
1337 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338 __kmp_release_user_lock_with_checks( lck, global_tid );
1339
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001340#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001341 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001342 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1343 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1344 (uint64_t) lck);
1345 }
1346#endif
1347
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001348#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001349 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001350 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1351}
1352
1353/*!
1354@ingroup SYNCHRONIZATION
1355@param loc source location information
1356@param global_tid thread id.
1357@return one if the thread should execute the master block, zero otherwise
1358
1359Start execution of a combined barrier and master. The barrier is executed inside this function.
1360*/
1361kmp_int32
1362__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1363{
1364 int status;
1365
1366 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1367
1368 if (! TCR_4(__kmp_init_parallel))
1369 __kmp_parallel_initialize();
1370
1371 if ( __kmp_env_consistency_check )
1372 __kmp_check_barrier( global_tid, ct_barrier, loc );
1373
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001374#if USE_ITT_NOTIFY
1375 __kmp_threads[global_tid]->th.th_ident = loc;
1376#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001377 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1378
1379 return (status != 0) ? 0 : 1;
1380}
1381
1382/*!
1383@ingroup SYNCHRONIZATION
1384@param loc source location information
1385@param global_tid thread id.
1386
1387Complete the execution of a combined barrier and master. This function should
1388only be called at the completion of the <tt>master</tt> code. Other threads will
1389still be waiting at the barrier and this call releases them.
1390*/
1391void
1392__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1393{
1394 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1395
1396 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1397}
1398
1399/*!
1400@ingroup SYNCHRONIZATION
1401@param loc source location information
1402@param global_tid thread id.
1403@return one if the thread should execute the master block, zero otherwise
1404
1405Start execution of a combined barrier and master(nowait) construct.
1406The barrier is executed inside this function.
1407There is no equivalent "end" function, since the
1408*/
1409kmp_int32
1410__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1411{
1412 kmp_int32 ret;
1413
1414 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1415
1416 if (! TCR_4(__kmp_init_parallel))
1417 __kmp_parallel_initialize();
1418
1419 if ( __kmp_env_consistency_check ) {
1420 if ( loc == 0 ) {
1421 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1422 }
1423 __kmp_check_barrier( global_tid, ct_barrier, loc );
1424 }
1425
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001426#if USE_ITT_NOTIFY
1427 __kmp_threads[global_tid]->th.th_ident = loc;
1428#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1430
1431 ret = __kmpc_master (loc, global_tid);
1432
1433 if ( __kmp_env_consistency_check ) {
1434 /* there's no __kmpc_end_master called; so the (stats) */
1435 /* actions of __kmpc_end_master are done here */
1436
1437 if ( global_tid < 0 ) {
1438 KMP_WARNING( ThreadIdentInvalid );
1439 }
1440 if (ret) {
1441 /* only one thread should do the pop since only */
1442 /* one did the push (see __kmpc_master()) */
1443
1444 __kmp_pop_sync( global_tid, ct_master, loc );
1445 }
1446 }
1447
1448 return (ret);
1449}
1450
1451/* The BARRIER for a SINGLE process section is always explicit */
1452/*!
1453@ingroup WORK_SHARING
1454@param loc source location information
1455@param global_tid global thread number
1456@return One if this thread should execute the single construct, zero otherwise.
1457
1458Test whether to execute a <tt>single</tt> construct.
1459There are no implicit barriers in the two "single" calls, rather the compiler should
1460introduce an explicit barrier if it is required.
1461*/
1462
1463kmp_int32
1464__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1465{
1466 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton30138252016-03-03 21:21:05 +00001467
1468 if (rc) {
1469 // We are going to execute the single statement, so we should count it.
1470 KMP_COUNT_BLOCK(OMP_SINGLE);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001471 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001472 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001473
1474#if OMPT_SUPPORT && OMPT_TRACE
1475 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1476 kmp_team_t *team = this_thr -> th.th_team;
1477 int tid = __kmp_tid_from_gtid( global_tid );
1478
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001479 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001480 if (rc) {
1481 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1482 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1483 team->t.ompt_team_info.parallel_id,
1484 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1485 team->t.ompt_team_info.microtask);
1486 }
1487 } else {
1488 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1489 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1490 team->t.ompt_team_info.parallel_id,
1491 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1492 }
1493 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1494 }
1495 }
1496#endif
1497
Jim Cownie5e8470a2013-09-27 10:38:44 +00001498 return rc;
1499}
1500
1501/*!
1502@ingroup WORK_SHARING
1503@param loc source location information
1504@param global_tid global thread number
1505
1506Mark the end of a <tt>single</tt> construct. This function should
1507only be called by the thread that executed the block of code protected
1508by the `single` construct.
1509*/
1510void
1511__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1512{
1513 __kmp_exit_single( global_tid );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001514 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001515
1516#if OMPT_SUPPORT && OMPT_TRACE
1517 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1518 kmp_team_t *team = this_thr -> th.th_team;
1519 int tid = __kmp_tid_from_gtid( global_tid );
1520
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001521 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001522 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1523 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1524 team->t.ompt_team_info.parallel_id,
1525 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1526 }
1527#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528}
1529
1530/*!
1531@ingroup WORK_SHARING
1532@param loc Source location
1533@param global_tid Global thread id
1534
1535Mark the end of a statically scheduled loop.
1536*/
1537void
1538__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1539{
1540 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1541
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001542#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001543 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001544 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001545 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1546 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001547 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001548 team_info->parallel_id, task_info->task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549 }
1550#endif
1551
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552 if ( __kmp_env_consistency_check )
1553 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1554}
1555
1556/*
1557 * User routines which take C-style arguments (call by value)
1558 * different from the Fortran equivalent routines
1559 */
1560
1561void
1562ompc_set_num_threads( int arg )
1563{
1564// !!!!! TODO: check the per-task binding
1565 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1566}
1567
1568void
1569ompc_set_dynamic( int flag )
1570{
1571 kmp_info_t *thread;
1572
1573 /* For the thread-private implementation of the internal controls */
1574 thread = __kmp_entry_thread();
1575
1576 __kmp_save_internal_controls( thread );
1577
1578 set__dynamic( thread, flag ? TRUE : FALSE );
1579}
1580
1581void
1582ompc_set_nested( int flag )
1583{
1584 kmp_info_t *thread;
1585
1586 /* For the thread-private internal controls implementation */
1587 thread = __kmp_entry_thread();
1588
1589 __kmp_save_internal_controls( thread );
1590
1591 set__nested( thread, flag ? TRUE : FALSE );
1592}
1593
Jim Cownie5e8470a2013-09-27 10:38:44 +00001594void
1595ompc_set_max_active_levels( int max_active_levels )
1596{
1597 /* TO DO */
1598 /* we want per-task implementation of this internal control */
1599
1600 /* For the per-thread internal controls implementation */
1601 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1602}
1603
1604void
1605ompc_set_schedule( omp_sched_t kind, int modifier )
1606{
1607// !!!!! TODO: check the per-task binding
1608 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1609}
1610
1611int
1612ompc_get_ancestor_thread_num( int level )
1613{
1614 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1615}
1616
1617int
1618ompc_get_team_size( int level )
1619{
1620 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1621}
1622
Jim Cownie5e8470a2013-09-27 10:38:44 +00001623void
1624kmpc_set_stacksize( int arg )
1625{
1626 // __kmp_aux_set_stacksize initializes the library if needed
1627 __kmp_aux_set_stacksize( arg );
1628}
1629
1630void
1631kmpc_set_stacksize_s( size_t arg )
1632{
1633 // __kmp_aux_set_stacksize initializes the library if needed
1634 __kmp_aux_set_stacksize( arg );
1635}
1636
1637void
1638kmpc_set_blocktime( int arg )
1639{
1640 int gtid, tid;
1641 kmp_info_t *thread;
1642
1643 gtid = __kmp_entry_gtid();
1644 tid = __kmp_tid_from_gtid(gtid);
1645 thread = __kmp_thread_from_gtid(gtid);
1646
1647 __kmp_aux_set_blocktime( arg, thread, tid );
1648}
1649
1650void
1651kmpc_set_library( int arg )
1652{
1653 // __kmp_user_set_library initializes the library if needed
1654 __kmp_user_set_library( (enum library_type)arg );
1655}
1656
1657void
1658kmpc_set_defaults( char const * str )
1659{
1660 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001661 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001662}
1663
Jonathan Peyton067325f2016-05-31 19:01:15 +00001664void
1665kmpc_set_disp_num_buffers( int arg )
1666{
1667 // ignore after initialization because some teams have already
1668 // allocated dispatch buffers
1669 if( __kmp_init_serial == 0 && arg > 0 )
1670 __kmp_dispatch_num_buffers = arg;
1671}
1672
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673int
1674kmpc_set_affinity_mask_proc( int proc, void **mask )
1675{
Alp Toker98758b02014-03-02 04:12:06 +00001676#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677 return -1;
1678#else
1679 if ( ! TCR_4(__kmp_init_middle) ) {
1680 __kmp_middle_initialize();
1681 }
1682 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1683#endif
1684}
1685
1686int
1687kmpc_unset_affinity_mask_proc( int proc, void **mask )
1688{
Alp Toker98758b02014-03-02 04:12:06 +00001689#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690 return -1;
1691#else
1692 if ( ! TCR_4(__kmp_init_middle) ) {
1693 __kmp_middle_initialize();
1694 }
1695 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1696#endif
1697}
1698
1699int
1700kmpc_get_affinity_mask_proc( int proc, void **mask )
1701{
Alp Toker98758b02014-03-02 04:12:06 +00001702#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703 return -1;
1704#else
1705 if ( ! TCR_4(__kmp_init_middle) ) {
1706 __kmp_middle_initialize();
1707 }
1708 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1709#endif
1710}
1711
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712
1713/* -------------------------------------------------------------------------- */
1714/*!
1715@ingroup THREADPRIVATE
1716@param loc source location information
1717@param gtid global thread number
1718@param cpy_size size of the cpy_data buffer
1719@param cpy_data pointer to data to be copied
1720@param cpy_func helper function to call for copying data
1721@param didit flag variable: 1=single thread; 0=not single thread
1722
1723__kmpc_copyprivate implements the interface for the private data broadcast needed for
1724the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1725All threads participating in the parallel region call this routine.
1726One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1727and all other threads should have that variable set to 0.
1728All threads pass a pointer to a data buffer (cpy_data) that they have built.
1729
1730The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1731clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1732race conditions, so the code generation for the single region should avoid generating a barrier
1733after the call to @ref __kmpc_copyprivate.
1734
1735The <tt>gtid</tt> parameter is the global thread id for the current thread.
1736The <tt>loc</tt> parameter is a pointer to source location information.
1737
1738Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1739to a team-private location, then the other threads will each call the function pointed to by
1740the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1741
1742The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1743and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1744the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1745to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1746
1747The interface to cpy_func is as follows:
1748@code
1749void cpy_func( void *destination, void *source )
1750@endcode
1751where void *destination is the cpy_data pointer for the thread being copied to
1752and void *source is the cpy_data pointer for the thread being copied from.
1753*/
1754void
1755__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1756{
1757 void **data_ptr;
1758
1759 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1760
1761 KMP_MB();
1762
1763 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1764
1765 if ( __kmp_env_consistency_check ) {
1766 if ( loc == 0 ) {
1767 KMP_WARNING( ConstructIdentInvalid );
1768 }
1769 }
1770
1771 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1772
1773 if (didit) *data_ptr = cpy_data;
1774
1775 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001776#if USE_ITT_NOTIFY
1777 __kmp_threads[gtid]->th.th_ident = loc;
1778#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1780
1781 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1782
1783 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1784 /* Nesting checks are already handled by the single construct checks */
1785
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001786#if USE_ITT_NOTIFY
1787 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1788#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001789 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1790}
1791
1792/* -------------------------------------------------------------------------- */
1793
1794#define INIT_LOCK __kmp_init_user_lock_with_checks
1795#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1796#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1797#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1798#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1799#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1800#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1801#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1802#define TEST_LOCK __kmp_test_user_lock_with_checks
1803#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1804#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1805#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1806
1807
1808/*
1809 * TODO: Make check abort messages use location info & pass it
1810 * into with_checks routines
1811 */
1812
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001813#if KMP_USE_DYNAMIC_LOCK
1814
1815// internal lock initializer
1816static __forceinline void
1817__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1818{
1819 if (KMP_IS_D_LOCK(seq)) {
1820 KMP_INIT_D_LOCK(lock, seq);
1821#if USE_ITT_BUILD
1822 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1823#endif
1824 } else {
1825 KMP_INIT_I_LOCK(lock, seq);
1826#if USE_ITT_BUILD
1827 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1828 __kmp_itt_lock_creating(ilk->lock, loc);
1829#endif
1830 }
1831}
1832
1833// internal nest lock initializer
1834static __forceinline void
1835__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1836{
1837#if KMP_USE_TSX
1838 // Don't have nested lock implementation for speculative locks
1839 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1840 seq = __kmp_user_lock_seq;
1841#endif
1842 switch (seq) {
1843 case lockseq_tas:
1844 seq = lockseq_nested_tas;
1845 break;
1846#if KMP_USE_FUTEX
1847 case lockseq_futex:
1848 seq = lockseq_nested_futex;
1849 break;
1850#endif
1851 case lockseq_ticket:
1852 seq = lockseq_nested_ticket;
1853 break;
1854 case lockseq_queuing:
1855 seq = lockseq_nested_queuing;
1856 break;
1857 case lockseq_drdpa:
1858 seq = lockseq_nested_drdpa;
1859 break;
1860 default:
1861 seq = lockseq_nested_queuing;
1862 }
1863 KMP_INIT_I_LOCK(lock, seq);
1864#if USE_ITT_BUILD
1865 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1866 __kmp_itt_lock_creating(ilk->lock, loc);
1867#endif
1868}
1869
1870/* initialize the lock with a hint */
1871void
1872__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1873{
1874 KMP_DEBUG_ASSERT(__kmp_init_serial);
1875 if (__kmp_env_consistency_check && user_lock == NULL) {
1876 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1877 }
1878
1879 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1880}
1881
1882/* initialize the lock with a hint */
1883void
1884__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1885{
1886 KMP_DEBUG_ASSERT(__kmp_init_serial);
1887 if (__kmp_env_consistency_check && user_lock == NULL) {
1888 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1889 }
1890
1891 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1892}
1893
1894#endif // KMP_USE_DYNAMIC_LOCK
1895
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896/* initialize the lock */
1897void
1898__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001899#if KMP_USE_DYNAMIC_LOCK
1900 KMP_DEBUG_ASSERT(__kmp_init_serial);
1901 if (__kmp_env_consistency_check && user_lock == NULL) {
1902 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1903 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001904 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001905
1906#else // KMP_USE_DYNAMIC_LOCK
1907
Jim Cownie5e8470a2013-09-27 10:38:44 +00001908 static char const * const func = "omp_init_lock";
1909 kmp_user_lock_p lck;
1910 KMP_DEBUG_ASSERT( __kmp_init_serial );
1911
1912 if ( __kmp_env_consistency_check ) {
1913 if ( user_lock == NULL ) {
1914 KMP_FATAL( LockIsUninitialized, func );
1915 }
1916 }
1917
1918 KMP_CHECK_USER_LOCK_INIT();
1919
1920 if ( ( __kmp_user_lock_kind == lk_tas )
1921 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1922 lck = (kmp_user_lock_p)user_lock;
1923 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001924#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925 else if ( ( __kmp_user_lock_kind == lk_futex )
1926 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1927 lck = (kmp_user_lock_p)user_lock;
1928 }
1929#endif
1930 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001931 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 }
1933 INIT_LOCK( lck );
1934 __kmp_set_user_lock_location( lck, loc );
1935
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001936#if OMPT_SUPPORT && OMPT_TRACE
1937 if (ompt_enabled &&
1938 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1939 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1940 }
1941#endif
1942
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943#if USE_ITT_BUILD
1944 __kmp_itt_lock_creating( lck );
1945#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001946
1947#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948} // __kmpc_init_lock
1949
1950/* initialize the lock */
1951void
1952__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001953#if KMP_USE_DYNAMIC_LOCK
1954
1955 KMP_DEBUG_ASSERT(__kmp_init_serial);
1956 if (__kmp_env_consistency_check && user_lock == NULL) {
1957 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1958 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001959 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001960
1961#else // KMP_USE_DYNAMIC_LOCK
1962
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963 static char const * const func = "omp_init_nest_lock";
1964 kmp_user_lock_p lck;
1965 KMP_DEBUG_ASSERT( __kmp_init_serial );
1966
1967 if ( __kmp_env_consistency_check ) {
1968 if ( user_lock == NULL ) {
1969 KMP_FATAL( LockIsUninitialized, func );
1970 }
1971 }
1972
1973 KMP_CHECK_USER_LOCK_INIT();
1974
1975 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1976 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1977 lck = (kmp_user_lock_p)user_lock;
1978 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001979#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 else if ( ( __kmp_user_lock_kind == lk_futex )
1981 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1982 <= OMP_NEST_LOCK_T_SIZE ) ) {
1983 lck = (kmp_user_lock_p)user_lock;
1984 }
1985#endif
1986 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001987 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988 }
1989
1990 INIT_NESTED_LOCK( lck );
1991 __kmp_set_user_lock_location( lck, loc );
1992
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001993#if OMPT_SUPPORT && OMPT_TRACE
1994 if (ompt_enabled &&
1995 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1996 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
1997 }
1998#endif
1999
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000#if USE_ITT_BUILD
2001 __kmp_itt_lock_creating( lck );
2002#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002003
2004#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005} // __kmpc_init_nest_lock
2006
2007void
2008__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002009#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002011# if USE_ITT_BUILD
2012 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002013 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2014 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002015 } else {
2016 lck = (kmp_user_lock_p)user_lock;
2017 }
2018 __kmp_itt_lock_destroyed(lck);
2019# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002020 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002021#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022 kmp_user_lock_p lck;
2023
2024 if ( ( __kmp_user_lock_kind == lk_tas )
2025 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2026 lck = (kmp_user_lock_p)user_lock;
2027 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002028#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029 else if ( ( __kmp_user_lock_kind == lk_futex )
2030 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2031 lck = (kmp_user_lock_p)user_lock;
2032 }
2033#endif
2034 else {
2035 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2036 }
2037
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002038#if OMPT_SUPPORT && OMPT_TRACE
2039 if (ompt_enabled &&
2040 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2041 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2042 }
2043#endif
2044
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045#if USE_ITT_BUILD
2046 __kmp_itt_lock_destroyed( lck );
2047#endif /* USE_ITT_BUILD */
2048 DESTROY_LOCK( lck );
2049
2050 if ( ( __kmp_user_lock_kind == lk_tas )
2051 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2052 ;
2053 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002054#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055 else if ( ( __kmp_user_lock_kind == lk_futex )
2056 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2057 ;
2058 }
2059#endif
2060 else {
2061 __kmp_user_lock_free( user_lock, gtid, lck );
2062 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002063#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064} // __kmpc_destroy_lock
2065
2066/* destroy the lock */
2067void
2068__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002069#if KMP_USE_DYNAMIC_LOCK
2070
2071# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002072 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002073 __kmp_itt_lock_destroyed(ilk->lock);
2074# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002075 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002076
2077#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
2079 kmp_user_lock_p lck;
2080
2081 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2082 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2083 lck = (kmp_user_lock_p)user_lock;
2084 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002085#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086 else if ( ( __kmp_user_lock_kind == lk_futex )
2087 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2088 <= OMP_NEST_LOCK_T_SIZE ) ) {
2089 lck = (kmp_user_lock_p)user_lock;
2090 }
2091#endif
2092 else {
2093 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2094 }
2095
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002096#if OMPT_SUPPORT && OMPT_TRACE
2097 if (ompt_enabled &&
2098 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2099 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2100 }
2101#endif
2102
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103#if USE_ITT_BUILD
2104 __kmp_itt_lock_destroyed( lck );
2105#endif /* USE_ITT_BUILD */
2106
2107 DESTROY_NESTED_LOCK( lck );
2108
2109 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2110 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2111 ;
2112 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002113#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114 else if ( ( __kmp_user_lock_kind == lk_futex )
2115 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2116 <= OMP_NEST_LOCK_T_SIZE ) ) {
2117 ;
2118 }
2119#endif
2120 else {
2121 __kmp_user_lock_free( user_lock, gtid, lck );
2122 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002123#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124} // __kmpc_destroy_nest_lock
2125
2126void
2127__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002128 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002129#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002130 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002131# if USE_ITT_BUILD
2132 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2133# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002134# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002135 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002136 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002137 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002138# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002139 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002140 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002141 } else
2142# endif
2143 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002144 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002145 }
2146# if USE_ITT_BUILD
2147 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2148# endif
2149
2150#else // KMP_USE_DYNAMIC_LOCK
2151
Jim Cownie5e8470a2013-09-27 10:38:44 +00002152 kmp_user_lock_p lck;
2153
2154 if ( ( __kmp_user_lock_kind == lk_tas )
2155 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2156 lck = (kmp_user_lock_p)user_lock;
2157 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002158#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002159 else if ( ( __kmp_user_lock_kind == lk_futex )
2160 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2161 lck = (kmp_user_lock_p)user_lock;
2162 }
2163#endif
2164 else {
2165 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2166 }
2167
2168#if USE_ITT_BUILD
2169 __kmp_itt_lock_acquiring( lck );
2170#endif /* USE_ITT_BUILD */
2171
2172 ACQUIRE_LOCK( lck, gtid );
2173
2174#if USE_ITT_BUILD
2175 __kmp_itt_lock_acquired( lck );
2176#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002178#if OMPT_SUPPORT && OMPT_TRACE
2179 if (ompt_enabled &&
2180 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2181 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2182 }
2183#endif
2184
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002185#endif // KMP_USE_DYNAMIC_LOCK
2186}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187
2188void
2189__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002190#if KMP_USE_DYNAMIC_LOCK
2191
2192# if USE_ITT_BUILD
2193 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2194# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002195 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002196# if USE_ITT_BUILD
2197 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2198#endif
2199
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002200#if OMPT_SUPPORT && OMPT_TRACE
2201 if (ompt_enabled) {
2202 // missing support here: need to know whether acquired first or not
2203 }
2204#endif
2205
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002206#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002207 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002208 kmp_user_lock_p lck;
2209
2210 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2211 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2212 lck = (kmp_user_lock_p)user_lock;
2213 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002214#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215 else if ( ( __kmp_user_lock_kind == lk_futex )
2216 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2217 <= OMP_NEST_LOCK_T_SIZE ) ) {
2218 lck = (kmp_user_lock_p)user_lock;
2219 }
2220#endif
2221 else {
2222 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2223 }
2224
2225#if USE_ITT_BUILD
2226 __kmp_itt_lock_acquiring( lck );
2227#endif /* USE_ITT_BUILD */
2228
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002229 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002230
2231#if USE_ITT_BUILD
2232 __kmp_itt_lock_acquired( lck );
2233#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002234
2235#if OMPT_SUPPORT && OMPT_TRACE
2236 if (ompt_enabled) {
2237 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2238 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2239 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2240 } else {
2241 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2242 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2243 }
2244 }
2245#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002246
2247#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248}
2249
2250void
2251__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2252{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002253#if KMP_USE_DYNAMIC_LOCK
2254
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002255 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002256# if USE_ITT_BUILD
2257 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2258# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002259# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002260 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002261 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002262 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002263# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002264 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002265 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002266 } else
2267# endif
2268 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002269 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002270 }
2271
2272#else // KMP_USE_DYNAMIC_LOCK
2273
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274 kmp_user_lock_p lck;
2275
2276 /* Can't use serial interval since not block structured */
2277 /* release the lock */
2278
2279 if ( ( __kmp_user_lock_kind == lk_tas )
2280 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002281#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282 // "fast" path implemented to fix customer performance issue
2283#if USE_ITT_BUILD
2284 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2285#endif /* USE_ITT_BUILD */
2286 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2287 KMP_MB();
2288 return;
2289#else
2290 lck = (kmp_user_lock_p)user_lock;
2291#endif
2292 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002293#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294 else if ( ( __kmp_user_lock_kind == lk_futex )
2295 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2296 lck = (kmp_user_lock_p)user_lock;
2297 }
2298#endif
2299 else {
2300 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2301 }
2302
2303#if USE_ITT_BUILD
2304 __kmp_itt_lock_releasing( lck );
2305#endif /* USE_ITT_BUILD */
2306
2307 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002308
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002309#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002310 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002311 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2312 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2313 }
2314#endif
2315
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002316#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317}
2318
2319/* release the lock */
2320void
2321__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2322{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002323#if KMP_USE_DYNAMIC_LOCK
2324
2325# if USE_ITT_BUILD
2326 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2327# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002328 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002329
2330#else // KMP_USE_DYNAMIC_LOCK
2331
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332 kmp_user_lock_p lck;
2333
2334 /* Can't use serial interval since not block structured */
2335
2336 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2337 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002338#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339 // "fast" path implemented to fix customer performance issue
2340 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2341#if USE_ITT_BUILD
2342 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2343#endif /* USE_ITT_BUILD */
2344 if ( --(tl->lk.depth_locked) == 0 ) {
2345 TCW_4(tl->lk.poll, 0);
2346 }
2347 KMP_MB();
2348 return;
2349#else
2350 lck = (kmp_user_lock_p)user_lock;
2351#endif
2352 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002353#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 else if ( ( __kmp_user_lock_kind == lk_futex )
2355 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2356 <= OMP_NEST_LOCK_T_SIZE ) ) {
2357 lck = (kmp_user_lock_p)user_lock;
2358 }
2359#endif
2360 else {
2361 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2362 }
2363
2364#if USE_ITT_BUILD
2365 __kmp_itt_lock_releasing( lck );
2366#endif /* USE_ITT_BUILD */
2367
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002368 int release_status;
2369 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002370#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002371 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002372 if (release_status == KMP_LOCK_RELEASED) {
2373 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2374 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2375 (uint64_t) lck);
2376 }
2377 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2378 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2379 (uint64_t) lck);
2380 }
2381 }
2382#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002383
2384#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385}
2386
2387/* try to acquire the lock */
2388int
2389__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2390{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002391 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002392
2393#if KMP_USE_DYNAMIC_LOCK
2394 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002395 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002396# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002397 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002398# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002399# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002400 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002401 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002402 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002403# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002404 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002405 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002406 } else
2407# endif
2408 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002409 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002410 }
2411 if (rc) {
2412# if USE_ITT_BUILD
2413 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2414# endif
2415 return FTN_TRUE;
2416 } else {
2417# if USE_ITT_BUILD
2418 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2419# endif
2420 return FTN_FALSE;
2421 }
2422
2423#else // KMP_USE_DYNAMIC_LOCK
2424
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 kmp_user_lock_p lck;
2426 int rc;
2427
2428 if ( ( __kmp_user_lock_kind == lk_tas )
2429 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2430 lck = (kmp_user_lock_p)user_lock;
2431 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002432#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002433 else if ( ( __kmp_user_lock_kind == lk_futex )
2434 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2435 lck = (kmp_user_lock_p)user_lock;
2436 }
2437#endif
2438 else {
2439 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2440 }
2441
2442#if USE_ITT_BUILD
2443 __kmp_itt_lock_acquiring( lck );
2444#endif /* USE_ITT_BUILD */
2445
2446 rc = TEST_LOCK( lck, gtid );
2447#if USE_ITT_BUILD
2448 if ( rc ) {
2449 __kmp_itt_lock_acquired( lck );
2450 } else {
2451 __kmp_itt_lock_cancelled( lck );
2452 }
2453#endif /* USE_ITT_BUILD */
2454 return ( rc ? FTN_TRUE : FTN_FALSE );
2455
2456 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002457
2458#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459}
2460
2461/* try to acquire the lock */
2462int
2463__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2464{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002465#if KMP_USE_DYNAMIC_LOCK
2466 int rc;
2467# if USE_ITT_BUILD
2468 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2469# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002470 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002471# if USE_ITT_BUILD
2472 if (rc) {
2473 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2474 } else {
2475 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2476 }
2477# endif
2478 return rc;
2479
2480#else // KMP_USE_DYNAMIC_LOCK
2481
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482 kmp_user_lock_p lck;
2483 int rc;
2484
2485 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2486 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2487 lck = (kmp_user_lock_p)user_lock;
2488 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002489#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002490 else if ( ( __kmp_user_lock_kind == lk_futex )
2491 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2492 <= OMP_NEST_LOCK_T_SIZE ) ) {
2493 lck = (kmp_user_lock_p)user_lock;
2494 }
2495#endif
2496 else {
2497 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2498 }
2499
2500#if USE_ITT_BUILD
2501 __kmp_itt_lock_acquiring( lck );
2502#endif /* USE_ITT_BUILD */
2503
2504 rc = TEST_NESTED_LOCK( lck, gtid );
2505#if USE_ITT_BUILD
2506 if ( rc ) {
2507 __kmp_itt_lock_acquired( lck );
2508 } else {
2509 __kmp_itt_lock_cancelled( lck );
2510 }
2511#endif /* USE_ITT_BUILD */
2512 return rc;
2513
2514 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002515
2516#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517}
2518
2519
2520/*--------------------------------------------------------------------------------------------------------------------*/
2521
2522/*
2523 * Interface to fast scalable reduce methods routines
2524 */
2525
2526// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2527// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2528// AT: which solution is better?
2529#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2530 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2531
2532#define __KMP_GET_REDUCTION_METHOD(gtid) \
2533 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2534
2535// description of the packed_reduction_method variable: look at the macros in kmp.h
2536
2537
2538// used in a critical section reduce block
2539static __forceinline void
2540__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2541
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002542 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543 // (although it's used for an internal purpose only)
2544 // why was it visible in previous implementation?
2545 // should we keep it visible in new reduce block?
2546 kmp_user_lock_p lck;
2547
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002548#if KMP_USE_DYNAMIC_LOCK
2549
Jonathan Peytondae13d82015-12-11 21:57:06 +00002550 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2551 // Check if it is initialized.
2552 if (*lk == 0) {
2553 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2554 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2555 } else {
2556 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002557 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002558 }
2559 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2560 // this lock initialization does not follow the normal dispatch path (lock table is not used).
2561 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2562 lck = (kmp_user_lock_p)lk;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002563 KMP_DEBUG_ASSERT(lck != NULL);
2564 if (__kmp_env_consistency_check) {
2565 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2566 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002567 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002568 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002569 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2570 lck = ilk->lock;
2571 KMP_DEBUG_ASSERT(lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002572 if (__kmp_env_consistency_check) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002573 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002574 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002575 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002576 }
2577
2578#else // KMP_USE_DYNAMIC_LOCK
2579
Jim Cownie5e8470a2013-09-27 10:38:44 +00002580 // We know that the fast reduction code is only emitted by Intel compilers
2581 // with 32 byte critical sections. If there isn't enough space, then we
2582 // have to use a pointer.
2583 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2584 lck = (kmp_user_lock_p)crit;
2585 }
2586 else {
2587 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2588 }
2589 KMP_DEBUG_ASSERT( lck != NULL );
2590
2591 if ( __kmp_env_consistency_check )
2592 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2593
2594 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002595
2596#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002597}
2598
2599// used in a critical section reduce block
2600static __forceinline void
2601__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2602
2603 kmp_user_lock_p lck;
2604
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002605#if KMP_USE_DYNAMIC_LOCK
2606
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002607 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002608 lck = (kmp_user_lock_p)crit;
2609 if (__kmp_env_consistency_check)
2610 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002611 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002612 } else {
2613 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2614 if (__kmp_env_consistency_check)
2615 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002616 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002617 }
2618
2619#else // KMP_USE_DYNAMIC_LOCK
2620
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2622 // sections. If there isn't enough space, then we have to use a pointer.
2623 if ( __kmp_base_user_lock_size > 32 ) {
2624 lck = *( (kmp_user_lock_p *) crit );
2625 KMP_ASSERT( lck != NULL );
2626 } else {
2627 lck = (kmp_user_lock_p) crit;
2628 }
2629
2630 if ( __kmp_env_consistency_check )
2631 __kmp_pop_sync( global_tid, ct_critical, loc );
2632
2633 __kmp_release_user_lock_with_checks( lck, global_tid );
2634
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002635#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636} // __kmp_end_critical_section_reduce_block
2637
2638
2639/* 2.a.i. Reduce Block without a terminating barrier */
2640/*!
2641@ingroup SYNCHRONIZATION
2642@param loc source location information
2643@param global_tid global thread number
2644@param num_vars number of items (variables) to be reduced
2645@param reduce_size size of data in bytes to be reduced
2646@param reduce_data pointer to data to be reduced
2647@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2648@param lck pointer to the unique lock data structure
2649@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2650
2651The nowait version is used for a reduce clause with the nowait argument.
2652*/
2653kmp_int32
2654__kmpc_reduce_nowait(
2655 ident_t *loc, kmp_int32 global_tid,
2656 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2657 kmp_critical_name *lck ) {
2658
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002659 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002660 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002662#if OMP_40_ENABLED
2663 kmp_team_t *team;
2664 kmp_info_t *th;
2665 int teams_swapped = 0, task_state;
2666#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2668
2669 // why do we need this initialization here at all?
2670 // Reduction clause can not be used as a stand-alone directive.
2671
2672 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2673 // possible detection of false-positive race by the threadchecker ???
2674 if( ! TCR_4( __kmp_init_parallel ) )
2675 __kmp_parallel_initialize();
2676
2677 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002678#if KMP_USE_DYNAMIC_LOCK
2679 if ( __kmp_env_consistency_check )
2680 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2681#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682 if ( __kmp_env_consistency_check )
2683 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002684#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002685
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002686#if OMP_40_ENABLED
2687 th = __kmp_thread_from_gtid(global_tid);
2688 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2689 team = th->th.th_team;
2690 if( team->t.t_level == th->th.th_teams_level ) {
2691 // this is reduction at teams construct
2692 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2693 // Let's swap teams temporarily for the reduction barrier
2694 teams_swapped = 1;
2695 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2696 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002697 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002698 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002699 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002700 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002701 }
2702 }
2703#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002704
2705 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2706 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2707 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2708 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2709 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2710 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2711 // a thread-specific "th_local.reduction_method" variable is used currently
2712 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2713
2714 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2715 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2716
2717 if( packed_reduction_method == critical_reduce_block ) {
2718
2719 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2720 retval = 1;
2721
2722 } else if( packed_reduction_method == empty_reduce_block ) {
2723
2724 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2725 retval = 1;
2726
2727 } else if( packed_reduction_method == atomic_reduce_block ) {
2728
2729 retval = 2;
2730
2731 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2732 // (it's not quite good, because the checking block has been closed by this 'pop',
2733 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2734 if ( __kmp_env_consistency_check )
2735 __kmp_pop_sync( global_tid, ct_reduce, loc );
2736
2737 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2738
2739 //AT: performance issue: a real barrier here
2740 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2741 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2742 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2743 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2744 // and be more in line with sense of NOWAIT
2745 //AT: TO DO: do epcc test and compare times
2746
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002747 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002748 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002749#if USE_ITT_NOTIFY
2750 __kmp_threads[global_tid]->th.th_ident = loc;
2751#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2753 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2754
2755 // all other workers except master should do this pop here
2756 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2757 if ( __kmp_env_consistency_check ) {
2758 if( retval == 0 ) {
2759 __kmp_pop_sync( global_tid, ct_reduce, loc );
2760 }
2761 }
2762
2763 } else {
2764
2765 // should never reach this block
2766 KMP_ASSERT( 0 ); // "unexpected method"
2767
2768 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002769#if OMP_40_ENABLED
2770 if( teams_swapped ) {
2771 // Restore thread structure
2772 th->th.th_info.ds.ds_tid = 0;
2773 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002774 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002775 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002776 th->th.th_task_state = task_state;
2777 }
2778#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2780
2781 return retval;
2782}
2783
2784/*!
2785@ingroup SYNCHRONIZATION
2786@param loc source location information
2787@param global_tid global thread id.
2788@param lck pointer to the unique lock data structure
2789
2790Finish the execution of a reduce nowait.
2791*/
2792void
2793__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2794
2795 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2796
2797 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2798
2799 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2800
2801 if( packed_reduction_method == critical_reduce_block ) {
2802
2803 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2804
2805 } else if( packed_reduction_method == empty_reduce_block ) {
2806
2807 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2808
2809 } else if( packed_reduction_method == atomic_reduce_block ) {
2810
2811 // neither master nor other workers should get here
2812 // (code gen does not generate this call in case 2: atomic reduce block)
2813 // actually it's better to remove this elseif at all;
2814 // after removal this value will checked by the 'else' and will assert
2815
2816 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2817
2818 // only master gets here
2819
2820 } else {
2821
2822 // should never reach this block
2823 KMP_ASSERT( 0 ); // "unexpected method"
2824
2825 }
2826
2827 if ( __kmp_env_consistency_check )
2828 __kmp_pop_sync( global_tid, ct_reduce, loc );
2829
2830 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2831
2832 return;
2833}
2834
2835/* 2.a.ii. Reduce Block with a terminating barrier */
2836
2837/*!
2838@ingroup SYNCHRONIZATION
2839@param loc source location information
2840@param global_tid global thread number
2841@param num_vars number of items (variables) to be reduced
2842@param reduce_size size of data in bytes to be reduced
2843@param reduce_data pointer to data to be reduced
2844@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2845@param lck pointer to the unique lock data structure
2846@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2847
2848A blocking reduce that includes an implicit barrier.
2849*/
2850kmp_int32
2851__kmpc_reduce(
2852 ident_t *loc, kmp_int32 global_tid,
2853 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2854 void (*reduce_func)(void *lhs_data, void *rhs_data),
2855 kmp_critical_name *lck )
2856{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002857 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002858 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2860
2861 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2862
2863 // why do we need this initialization here at all?
2864 // Reduction clause can not be a stand-alone directive.
2865
2866 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2867 // possible detection of false-positive race by the threadchecker ???
2868 if( ! TCR_4( __kmp_init_parallel ) )
2869 __kmp_parallel_initialize();
2870
2871 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002872#if KMP_USE_DYNAMIC_LOCK
2873 if ( __kmp_env_consistency_check )
2874 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2875#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002876 if ( __kmp_env_consistency_check )
2877 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002878#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002879
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2881 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2882
2883 if( packed_reduction_method == critical_reduce_block ) {
2884
2885 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2886 retval = 1;
2887
2888 } else if( packed_reduction_method == empty_reduce_block ) {
2889
2890 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2891 retval = 1;
2892
2893 } else if( packed_reduction_method == atomic_reduce_block ) {
2894
2895 retval = 2;
2896
2897 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2898
2899 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002900 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002901 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002902#if USE_ITT_NOTIFY
2903 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2904#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002905 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2906 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2907
2908 // all other workers except master should do this pop here
2909 // ( none of other workers except master will enter __kmpc_end_reduce() )
2910 if ( __kmp_env_consistency_check ) {
2911 if( retval == 0 ) { // 0: all other workers; 1: master
2912 __kmp_pop_sync( global_tid, ct_reduce, loc );
2913 }
2914 }
2915
2916 } else {
2917
2918 // should never reach this block
2919 KMP_ASSERT( 0 ); // "unexpected method"
2920
2921 }
2922
2923 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2924
2925 return retval;
2926}
2927
2928/*!
2929@ingroup SYNCHRONIZATION
2930@param loc source location information
2931@param global_tid global thread id.
2932@param lck pointer to the unique lock data structure
2933
2934Finish the execution of a blocking reduce.
2935The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2936*/
2937void
2938__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2939
2940 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2941
2942 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2943
2944 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2945
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002946 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947 // (it's a terminating barrier on constructs if NOWAIT not specified)
2948
2949 if( packed_reduction_method == critical_reduce_block ) {
2950
2951 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2952
2953 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002954#if USE_ITT_NOTIFY
2955 __kmp_threads[global_tid]->th.th_ident = loc;
2956#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2958
2959 } else if( packed_reduction_method == empty_reduce_block ) {
2960
2961 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2962
2963 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002964#if USE_ITT_NOTIFY
2965 __kmp_threads[global_tid]->th.th_ident = loc;
2966#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2968
2969 } else if( packed_reduction_method == atomic_reduce_block ) {
2970
2971 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002972#if USE_ITT_NOTIFY
2973 __kmp_threads[global_tid]->th.th_ident = loc;
2974#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002975 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2976
2977 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2978
2979 // only master executes here (master releases all other workers)
2980 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2981
2982 } else {
2983
2984 // should never reach this block
2985 KMP_ASSERT( 0 ); // "unexpected method"
2986
2987 }
2988
2989 if ( __kmp_env_consistency_check )
2990 __kmp_pop_sync( global_tid, ct_reduce, loc );
2991
2992 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2993
2994 return;
2995}
2996
2997#undef __KMP_GET_REDUCTION_METHOD
2998#undef __KMP_SET_REDUCTION_METHOD
2999
3000/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
3001
3002kmp_uint64
3003__kmpc_get_taskid() {
3004
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003005 kmp_int32 gtid;
3006 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003007
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003008 gtid = __kmp_get_gtid();
3009 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003010 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003011 }; // if
3012 thread = __kmp_thread_from_gtid( gtid );
3013 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014
3015} // __kmpc_get_taskid
3016
3017
3018kmp_uint64
3019__kmpc_get_parent_taskid() {
3020
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003021 kmp_int32 gtid;
3022 kmp_info_t * thread;
3023 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003025 gtid = __kmp_get_gtid();
3026 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003027 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003028 }; // if
3029 thread = __kmp_thread_from_gtid( gtid );
3030 parent_task = thread->th.th_current_task->td_parent;
3031 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032
3033} // __kmpc_get_parent_taskid
3034
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003035void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037 if ( ! __kmp_init_serial ) {
3038 __kmp_serial_initialize();
3039 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003040 __kmp_place_num_sockets = nS;
3041 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003043 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003044 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003045}
3046
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003047#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003048/*!
3049@ingroup WORK_SHARING
3050@param loc source location information.
3051@param gtid global thread number.
3052@param num_dims number of associated doacross loops.
3053@param dims info on loops bounds.
3054
3055Initialize doacross loop information.
3056Expect compiler send us inclusive bounds,
3057e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3058*/
3059void
3060__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims)
3061{
3062 int j, idx;
3063 kmp_int64 last, trace_count;
3064 kmp_info_t *th = __kmp_threads[gtid];
3065 kmp_team_t *team = th->th.th_team;
3066 kmp_uint32 *flags;
3067 kmp_disp_t *pr_buf = th->th.th_dispatch;
3068 dispatch_shared_info_t *sh_buf;
3069
3070 KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3071 gtid, num_dims, !team->t.t_serialized));
3072 KMP_DEBUG_ASSERT(dims != NULL);
3073 KMP_DEBUG_ASSERT(num_dims > 0);
3074
3075 if( team->t.t_serialized ) {
3076 KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n"));
3077 return; // no dependencies if team is serialized
3078 }
3079 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3080 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop
Jonathan Peyton067325f2016-05-31 19:01:15 +00003081 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003082
3083 // Save bounds info into allocated private buffer
3084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3085 pr_buf->th_doacross_info =
3086 (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1));
3087 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3088 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions
3089 // Save also address of num_done in order to access it later without knowing the buffer index
3090 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3091 pr_buf->th_doacross_info[2] = dims[0].lo;
3092 pr_buf->th_doacross_info[3] = dims[0].up;
3093 pr_buf->th_doacross_info[4] = dims[0].st;
3094 last = 5;
3095 for( j = 1; j < num_dims; ++j ) {
3096 kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0]
3097 if( dims[j].st == 1 ) { // most common case
3098 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3099 range_length = dims[j].up - dims[j].lo + 1;
3100 } else {
3101 if( dims[j].st > 0 ) {
3102 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3103 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3104 } else { // negative increment
3105 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3106 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3107 }
3108 }
3109 pr_buf->th_doacross_info[last++] = range_length;
3110 pr_buf->th_doacross_info[last++] = dims[j].lo;
3111 pr_buf->th_doacross_info[last++] = dims[j].up;
3112 pr_buf->th_doacross_info[last++] = dims[j].st;
3113 }
3114
3115 // Compute total trip count.
3116 // Start with range of dims[0] which we don't need to keep in the buffer.
3117 if( dims[0].st == 1 ) { // most common case
3118 trace_count = dims[0].up - dims[0].lo + 1;
3119 } else if( dims[0].st > 0 ) {
3120 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3121 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3122 } else { // negative increment
3123 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3124 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3125 }
3126 for( j = 1; j < num_dims; ++j ) {
3127 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3128 }
3129 KMP_DEBUG_ASSERT(trace_count > 0);
3130
Jonathan Peyton067325f2016-05-31 19:01:15 +00003131 // Check if shared buffer is not occupied by other loop (idx - __kmp_dispatch_num_buffers)
Jonathan Peyton71909c52016-03-02 22:42:06 +00003132 if( idx != sh_buf->doacross_buf_idx ) {
3133 // Shared buffer is occupied, wait for it to be free
3134 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3135 }
3136 // Check if we are the first thread. After the CAS the first thread gets 0,
3137 // others get 1 if initialization is in progress, allocated pointer otherwise.
3138 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3139 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3140 if( flags == NULL ) {
3141 // we are the first thread, allocate the array of flags
3142 kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3143 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3144 } else if( (kmp_int64)flags == 1 ) {
3145 // initialization is still in progress, need to wait
3146 while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3147 KMP_YIELD(TRUE);
3148 }
3149 }
3150 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer
3151 pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not
3152 // touch shared buffer on each iteration
3153 KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid));
3154}
3155
3156void
3157__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec)
3158{
3159 kmp_int32 shft, num_dims, i;
3160 kmp_uint32 flag;
3161 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3162 kmp_info_t *th = __kmp_threads[gtid];
3163 kmp_team_t *team = th->th.th_team;
3164 kmp_disp_t *pr_buf;
3165 kmp_int64 lo, up, st;
3166
3167 KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3168 if( team->t.t_serialized ) {
3169 KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n"));
3170 return; // no dependencies if team is serialized
3171 }
3172
3173 // calculate sequential iteration number and check out-of-bounds condition
3174 pr_buf = th->th.th_dispatch;
3175 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3176 num_dims = pr_buf->th_doacross_info[0];
3177 lo = pr_buf->th_doacross_info[2];
3178 up = pr_buf->th_doacross_info[3];
3179 st = pr_buf->th_doacross_info[4];
3180 if( st == 1 ) { // most common case
3181 if( vec[0] < lo || vec[0] > up ) {
3182 KA_TRACE(20,(
3183 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3184 gtid, vec[0], lo, up));
3185 return;
3186 }
3187 iter_number = vec[0] - lo;
3188 } else if( st > 0 ) {
3189 if( vec[0] < lo || vec[0] > up ) {
3190 KA_TRACE(20,(
3191 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3192 gtid, vec[0], lo, up));
3193 return;
3194 }
3195 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3196 } else { // negative increment
3197 if( vec[0] > lo || vec[0] < up ) {
3198 KA_TRACE(20,(
3199 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3200 gtid, vec[0], lo, up));
3201 return;
3202 }
3203 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3204 }
3205 for( i = 1; i < num_dims; ++i ) {
3206 kmp_int64 iter, ln;
3207 kmp_int32 j = i * 4;
3208 ln = pr_buf->th_doacross_info[j + 1];
3209 lo = pr_buf->th_doacross_info[j + 2];
3210 up = pr_buf->th_doacross_info[j + 3];
3211 st = pr_buf->th_doacross_info[j + 4];
3212 if( st == 1 ) {
3213 if( vec[i] < lo || vec[i] > up ) {
3214 KA_TRACE(20,(
3215 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3216 gtid, vec[i], lo, up));
3217 return;
3218 }
3219 iter = vec[i] - lo;
3220 } else if( st > 0 ) {
3221 if( vec[i] < lo || vec[i] > up ) {
3222 KA_TRACE(20,(
3223 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3224 gtid, vec[i], lo, up));
3225 return;
3226 }
3227 iter = (kmp_uint64)(vec[i] - lo) / st;
3228 } else { // st < 0
3229 if( vec[i] > lo || vec[i] < up ) {
3230 KA_TRACE(20,(
3231 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3232 gtid, vec[i], lo, up));
3233 return;
3234 }
3235 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3236 }
3237 iter_number = iter + ln * iter_number;
3238 }
3239 shft = iter_number % 32; // use 32-bit granularity
3240 iter_number >>= 5; // divided by 32
3241 flag = 1 << shft;
3242 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3243 KMP_YIELD(TRUE);
3244 }
3245 KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3246 gtid, (iter_number<<5)+shft));
3247}
3248
3249void
3250__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec)
3251{
3252 kmp_int32 shft, num_dims, i;
3253 kmp_uint32 flag;
3254 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3255 kmp_info_t *th = __kmp_threads[gtid];
3256 kmp_team_t *team = th->th.th_team;
3257 kmp_disp_t *pr_buf;
3258 kmp_int64 lo, st;
3259
3260 KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3261 if( team->t.t_serialized ) {
3262 KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n"));
3263 return; // no dependencies if team is serialized
3264 }
3265
3266 // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks)
3267 pr_buf = th->th.th_dispatch;
3268 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3269 num_dims = pr_buf->th_doacross_info[0];
3270 lo = pr_buf->th_doacross_info[2];
3271 st = pr_buf->th_doacross_info[4];
3272 if( st == 1 ) { // most common case
3273 iter_number = vec[0] - lo;
3274 } else if( st > 0 ) {
3275 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3276 } else { // negative increment
3277 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3278 }
3279 for( i = 1; i < num_dims; ++i ) {
3280 kmp_int64 iter, ln;
3281 kmp_int32 j = i * 4;
3282 ln = pr_buf->th_doacross_info[j + 1];
3283 lo = pr_buf->th_doacross_info[j + 2];
3284 st = pr_buf->th_doacross_info[j + 4];
3285 if( st == 1 ) {
3286 iter = vec[i] - lo;
3287 } else if( st > 0 ) {
3288 iter = (kmp_uint64)(vec[i] - lo) / st;
3289 } else { // st < 0
3290 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3291 }
3292 iter_number = iter + ln * iter_number;
3293 }
3294 shft = iter_number % 32; // use 32-bit granularity
3295 iter_number >>= 5; // divided by 32
3296 flag = 1 << shft;
3297 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3298 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3299 KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3300 gtid, (iter_number<<5)+shft));
3301}
3302
3303void
3304__kmpc_doacross_fini(ident_t *loc, int gtid)
3305{
3306 kmp_int64 num_done;
3307 kmp_info_t *th = __kmp_threads[gtid];
3308 kmp_team_t *team = th->th.th_team;
3309 kmp_disp_t *pr_buf = th->th.th_dispatch;
3310
3311 KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3312 if( team->t.t_serialized ) {
3313 KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team));
3314 return; // nothing to do
3315 }
3316 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3317 if( num_done == th->th.th_team_nproc ) {
3318 // we are the last thread, need to free shared resources
3319 int idx = pr_buf->th_doacross_buf_idx - 1;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003320 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003321 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3322 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3323 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3324 __kmp_thread_free(th, (void*)sh_buf->doacross_flags);
3325 sh_buf->doacross_flags = NULL;
3326 sh_buf->doacross_num_done = 0;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003327 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers; // free buffer for future re-use
Jonathan Peyton71909c52016-03-02 22:42:06 +00003328 }
3329 // free private resources (need to keep buffer index forever)
3330 __kmp_thread_free(th, (void*)pr_buf->th_doacross_info);
3331 pr_buf->th_doacross_info = NULL;
3332 KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid));
3333}
3334#endif
3335
Jim Cownie5e8470a2013-09-27 10:38:44 +00003336// end of file //
3337