blob: 8687795201f0a80320260b7b7a27b7bffb97dfe8 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#if OMPT_SUPPORT
25#include "ompt-internal.h"
26#include "ompt-specific.h"
27#endif
28
Jim Cownie5e8470a2013-09-27 10:38:44 +000029#define MAX_MESSAGE 512
30
31/* ------------------------------------------------------------------------ */
32/* ------------------------------------------------------------------------ */
33
34/* flags will be used in future, e.g., to implement */
35/* openmp_strict library restrictions */
36
37/*!
38 * @ingroup STARTUP_SHUTDOWN
39 * @param loc in source location information
40 * @param flags in for future use (currently ignored)
41 *
42 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000043 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000044 *
45 */
46void
47__kmpc_begin(ident_t *loc, kmp_int32 flags)
48{
Andrey Churbanovad3f6392017-02-16 17:08:40 +000049 // By default __kmpc_begin() is no-op.
50 char *env;
51 if ((env = getenv( "KMP_INITIAL_THREAD_BIND" )) != NULL &&
52 __kmp_str_match_true( env )) {
53 __kmp_middle_initialize();
54 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n" ));
55 } else if (__kmp_ignore_mppbeg() == FALSE) {
56 // By default __kmp_ignore_mppbeg() returns TRUE.
Jim Cownie5e8470a2013-09-27 10:38:44 +000057 __kmp_internal_begin();
Jim Cownie5e8470a2013-09-27 10:38:44 +000058 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
59 }
60}
61
62/*!
63 * @ingroup STARTUP_SHUTDOWN
64 * @param loc source location information
65 *
66 * Shutdown the runtime library. This is also optional, and even if called will not
67 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
68 */
69void
70__kmpc_end(ident_t *loc)
71{
72 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
73 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
74 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
75 // will unregister this root (it can cause library shut down).
76 if (__kmp_ignore_mppend() == FALSE) {
77 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
78 KA_TRACE( 30, ("__kmpc_end\n" ));
79
80 __kmp_internal_end_thread( -1 );
81 }
82}
83
84/*!
85@ingroup THREAD_STATES
86@param loc Source location information.
87@return The global thread index of the active thread.
88
89This function can be called in any context.
90
91If the runtime has ony been entered at the outermost level from a
92single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000093which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000094active parallel construct. (Or zero if there is no active parallel
95construct, since the master thread is necessarily thread zero).
96
97If multiple non-OpenMP threads all enter an OpenMP construct then this
98will be a unique thread identifier among all the threads created by
99the OpenMP runtime (but the value cannote be defined in terms of
100OpenMP thread ids returned by omp_get_thread_num()).
101
102*/
103kmp_int32
104__kmpc_global_thread_num(ident_t *loc)
105{
106 kmp_int32 gtid = __kmp_entry_gtid();
107
108 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
109
110 return gtid;
111}
112
113/*!
114@ingroup THREAD_STATES
115@param loc Source location information.
116@return The number of threads under control of the OpenMP<sup>*</sup> runtime
117
118This function can be called in any context.
119It returns the total number of threads under the control of the OpenMP runtime. That is
120not a number that can be determined by any OpenMP standard calls, since the library may be
121called from more than one non-OpenMP thread, and this reflects the total over all such calls.
122Similarly the runtime maintains underlying threads even when they are not active (since the cost
123of creating and destroying OS threads is high), this call counts all such threads even if they are not
124waiting for work.
125*/
126kmp_int32
127__kmpc_global_num_threads(ident_t *loc)
128{
Andrey Churbanov76d42852016-12-21 21:20:20 +0000129 KC_TRACE(10,("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130
Andrey Churbanov76d42852016-12-21 21:20:20 +0000131 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000132}
133
134/*!
135@ingroup THREAD_STATES
136@param loc Source location information.
137@return The thread number of the calling thread in the innermost active parallel construct.
138
139*/
140kmp_int32
141__kmpc_bound_thread_num(ident_t *loc)
142{
143 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
144 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
145}
146
147/*!
148@ingroup THREAD_STATES
149@param loc Source location information.
150@return The number of threads in the innermost active parallel construct.
151*/
152kmp_int32
153__kmpc_bound_num_threads(ident_t *loc)
154{
155 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
156
157 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
158}
159
160/*!
161 * @ingroup DEPRECATED
162 * @param loc location description
163 *
164 * This function need not be called. It always returns TRUE.
165 */
166kmp_int32
167__kmpc_ok_to_fork(ident_t *loc)
168{
169#ifndef KMP_DEBUG
170
171 return TRUE;
172
173#else
174
175 const char *semi2;
176 const char *semi3;
177 int line_no;
178
179 if (__kmp_par_range == 0) {
180 return TRUE;
181 }
182 semi2 = loc->psource;
183 if (semi2 == NULL) {
184 return TRUE;
185 }
186 semi2 = strchr(semi2, ';');
187 if (semi2 == NULL) {
188 return TRUE;
189 }
190 semi2 = strchr(semi2 + 1, ';');
191 if (semi2 == NULL) {
192 return TRUE;
193 }
194 if (__kmp_par_range_filename[0]) {
195 const char *name = semi2 - 1;
196 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
197 name--;
198 }
199 if ((*name == '/') || (*name == ';')) {
200 name++;
201 }
202 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
203 return __kmp_par_range < 0;
204 }
205 }
206 semi3 = strchr(semi2 + 1, ';');
207 if (__kmp_par_range_routine[0]) {
208 if ((semi3 != NULL) && (semi3 > semi2)
209 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
210 return __kmp_par_range < 0;
211 }
212 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000213 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
215 return __kmp_par_range > 0;
216 }
217 return __kmp_par_range < 0;
218 }
219 return TRUE;
220
221#endif /* KMP_DEBUG */
222
223}
224
225/*!
226@ingroup THREAD_STATES
227@param loc Source location information.
228@return 1 if this thread is executing inside an active parallel region, zero if not.
229*/
230kmp_int32
231__kmpc_in_parallel( ident_t *loc )
232{
233 return __kmp_entry_thread() -> th.th_root -> r.r_active;
234}
235
236/*!
237@ingroup PARALLEL
238@param loc source location information
239@param global_tid global thread number
240@param num_threads number of threads requested for this parallel construct
241
242Set the number of threads to be used by the next fork spawned by this thread.
243This call is only required if the parallel construct has a `num_threads` clause.
244*/
245void
246__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
247{
248 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
249 global_tid, num_threads ) );
250
251 __kmp_push_num_threads( loc, global_tid, num_threads );
252}
253
254void
255__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
256{
257 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
258
259 /* the num_threads are automatically popped */
260}
261
262
263#if OMP_40_ENABLED
264
265void
266__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
267{
268 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
269 global_tid, proc_bind ) );
270
271 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
272}
273
274#endif /* OMP_40_ENABLED */
275
276
277/*!
278@ingroup PARALLEL
279@param loc source location information
280@param argc total number of arguments in the ellipsis
281@param microtask pointer to callback routine consisting of outlined parallel construct
282@param ... pointers to shared variables that aren't global
283
284Do the actual fork and call the microtask in the relevant number of threads.
285*/
286void
287__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
288{
289 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000290
Jonathan Peyton61118492016-05-20 19:03:38 +0000291#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000292 int inParallel = __kmpc_in_parallel(loc);
293 if (inParallel)
294 {
295 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
296 }
297 else
298 {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000299 KMP_COUNT_BLOCK(OMP_PARALLEL);
300 }
301#endif
302
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 // maybe to save thr_state is enough here
304 {
305 va_list ap;
306 va_start( ap, microtask );
307
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000308#if OMPT_SUPPORT
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000309 ompt_frame_t* ompt_frame;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000310 if (ompt_enabled) {
Jonas Hahnfelddd9a05d2016-09-14 13:59:31 +0000311 kmp_info_t *master_th = __kmp_threads[ gtid ];
312 kmp_team_t *parent_team = master_th->th.th_team;
313 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
314 if (lwt)
315 ompt_frame = &(lwt->ompt_task_info.frame);
316 else
317 {
318 int tid = __kmp_tid_from_gtid( gtid );
319 ompt_frame = &(parent_team->t.t_implicit_task_taskdata[tid].
320 ompt_task_info.frame);
321 }
322 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000323 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000324#endif
325
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000326#if INCLUDE_SSC_MARKS
327 SSC_MARK_FORKING();
328#endif
329 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000331#if OMPT_SUPPORT
332 VOLATILE_CAST(void *) microtask, // "unwrapped" task
333#endif
334 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
336/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000337#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338 &ap
339#else
340 ap
341#endif
342 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000343#if INCLUDE_SSC_MARKS
344 SSC_MARK_JOINING();
345#endif
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000346 __kmp_join_call( loc, gtid
347#if OMPT_SUPPORT
348 , fork_context_intel
349#endif
350 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000351
352 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000353
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354 }
355}
356
357#if OMP_40_ENABLED
358/*!
359@ingroup PARALLEL
360@param loc source location information
361@param global_tid global thread number
362@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000363@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
365Set the number of teams to be used by the teams construct.
366This call is only required if the teams construct has a `num_teams` clause
367or a `thread_limit` clause (or both).
368*/
369void
370__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
371{
372 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
373 global_tid, num_teams, num_threads ) );
374
375 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
376}
377
378/*!
379@ingroup PARALLEL
380@param loc source location information
381@param argc total number of arguments in the ellipsis
382@param microtask pointer to callback routine consisting of outlined teams construct
383@param ... pointers to shared variables that aren't global
384
385Do the actual fork and call the microtask in the relevant number of threads.
386*/
387void
388__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
389{
390 int gtid = __kmp_entry_gtid();
391 kmp_info_t *this_thr = __kmp_threads[ gtid ];
392 va_list ap;
393 va_start( ap, microtask );
394
Jonathan Peyton45be4502015-08-11 21:36:41 +0000395 KMP_COUNT_BLOCK(OMP_TEAMS);
396
Jim Cownie5e8470a2013-09-27 10:38:44 +0000397 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000398 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000399 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
400
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000401#if OMPT_SUPPORT
402 kmp_team_t *parent_team = this_thr->th.th_team;
403 int tid = __kmp_tid_from_gtid( gtid );
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000404 if (ompt_enabled) {
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000405 parent_team->t.t_implicit_task_taskdata[tid].
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000406 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000407 }
408#endif
409
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000411 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412 __kmp_push_num_teams( loc, gtid, 0, 0 );
413 }
414 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000415 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000418 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000420#if OMPT_SUPPORT
421 VOLATILE_CAST(void *) microtask, // "unwrapped" task
422#endif
423 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000425#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000426 &ap
427#else
428 ap
429#endif
430 );
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000431 __kmp_join_call( loc, gtid
432#if OMPT_SUPPORT
433 , fork_context_intel
434#endif
435 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000436
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000437 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000439 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440 va_end( ap );
441}
442#endif /* OMP_40_ENABLED */
443
444
445//
446// I don't think this function should ever have been exported.
447// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
448// openmp code ever called it, but it's been exported from the RTL for so
449// long that I'm afraid to remove the definition.
450//
451int
452__kmpc_invoke_task_func( int gtid )
453{
454 return __kmp_invoke_task_func( gtid );
455}
456
457/*!
458@ingroup PARALLEL
459@param loc source location information
460@param global_tid global thread number
461
462Enter a serialized parallel construct. This interface is used to handle a
463conditional parallel region, like this,
464@code
465#pragma omp parallel if (condition)
466@endcode
467when the condition is false.
468*/
469void
470__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
471{
Jonathan Peytonde4749b2016-12-14 23:01:24 +0000472 // The implementation is now in kmp_runtime.cpp so that it can share static
473 // functions with kmp_fork_call since the tasks to be done are similar in
474 // each case.
475 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000476}
477
478/*!
479@ingroup PARALLEL
480@param loc source location information
481@param global_tid global thread number
482
483Leave a serialized parallel construct.
484*/
485void
486__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
487{
488 kmp_internal_control_t *top;
489 kmp_info_t *this_thr;
490 kmp_team_t *serial_team;
491
492 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
493
494 /* skip all this code for autopar serialized loops since it results in
495 unacceptable overhead */
496 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
497 return;
498
499 // Not autopar code
500 if( ! TCR_4( __kmp_init_parallel ) )
501 __kmp_parallel_initialize();
502
503 this_thr = __kmp_threads[ global_tid ];
504 serial_team = this_thr->th.th_serial_team;
505
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000506 #if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000507 kmp_task_team_t * task_team = this_thr->th.th_task_team;
508
509 // we need to wait for the proxy tasks before finishing the thread
510 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
Jonathan Peyton7abf9d52016-05-26 18:19:10 +0000511 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) ); // is an ITT object needed here?
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000512 #endif
513
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514 KMP_MB();
515 KMP_DEBUG_ASSERT( serial_team );
516 KMP_ASSERT( serial_team -> t.t_serialized );
517 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
518 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
519 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
520 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
521
522 /* If necessary, pop the internal control stack values and replace the team values */
523 top = serial_team -> t.t_control_stack_top;
524 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000525 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526 serial_team -> t.t_control_stack_top = top -> next;
527 __kmp_free(top);
528 }
529
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530 //if( serial_team -> t.t_serialized > 1 )
531 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532
533 /* pop dispatch buffers stack */
534 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
535 {
536 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
537 serial_team->t.t_dispatch->th_disp_buffer =
538 serial_team->t.t_dispatch->th_disp_buffer->next;
539 __kmp_free( disp_buffer );
540 }
541
542 -- serial_team -> t.t_serialized;
543 if ( serial_team -> t.t_serialized == 0 ) {
544
545 /* return to the parallel section */
546
547#if KMP_ARCH_X86 || KMP_ARCH_X86_64
548 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
549 __kmp_clear_x87_fpu_status_word();
550 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
551 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
552 }
553#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
554
555 this_thr -> th.th_team = serial_team -> t.t_parent;
556 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
557
558 /* restore values cached in the thread */
559 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
560 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
561 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
562
563 /* TODO the below shouldn't need to be adjusted for serialized teams */
564 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
565 t.t_dispatch[ serial_team -> t.t_master_tid ];
566
Jim Cownie5e8470a2013-09-27 10:38:44 +0000567 __kmp_pop_current_task_from_thread( this_thr );
568
569 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
570 this_thr -> th.th_current_task -> td_flags.executing = 1;
571
572 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000573 // Copy the task team from the new child / old parent team to the thread.
574 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
576 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
577 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000578 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000579 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
580 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
581 global_tid, serial_team, serial_team -> t.t_serialized ) );
582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583 }
584
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585 if ( __kmp_env_consistency_check )
586 __kmp_pop_parallel( global_tid, NULL );
587}
588
589/*!
590@ingroup SYNCHRONIZATION
591@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000592
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000593Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594depending on the memory ordering convention obeyed by the compiler
595even that may not be necessary).
596*/
597void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000598__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599{
600 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
601
602 /* need explicit __mf() here since use volatile instead in library */
603 KMP_MB(); /* Flush all pending memory write invalidates. */
604
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000605 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
606 #if KMP_MIC
607 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
608 // We shouldn't need it, though, since the ABI rules require that
609 // * If the compiler generates NGO stores it also generates the fence
610 // * If users hand-code NGO stores they should insert the fence
611 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000613 // C74404
614 // This is to address non-temporal store instructions (sfence needed).
615 // The clflush instruction is addressed either (mfence needed).
616 // Probably the non-temporal load monvtdqa instruction should also be addressed.
617 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
618 if ( ! __kmp_cpuinfo.initialized ) {
619 __kmp_query_cpuid( & __kmp_cpuinfo );
620 }; // if
621 if ( ! __kmp_cpuinfo.sse2 ) {
622 // CPU cannot execute SSE2 instructions.
623 } else {
Jonathan Peyton61118492016-05-20 19:03:38 +0000624 #if KMP_COMPILER_ICC
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000625 _mm_mfence();
Jonathan Peytonb7d30cb2016-03-23 16:27:25 +0000626 #elif KMP_COMPILER_MSVC
627 MemoryBarrier();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000628 #else
629 __sync_synchronize();
630 #endif // KMP_COMPILER_ICC
631 }; // if
632 #endif // KMP_MIC
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +0000633 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000634 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000635 #elif KMP_ARCH_PPC64
636 // Nothing needed here (we have a real MB above).
637 #if KMP_OS_CNK
638 // The flushing thread needs to yield here; this prevents a
639 // busy-waiting thread from saturating the pipeline. flush is
640 // often used in loops like this:
641 // while (!flag) {
642 // #pragma omp flush(flag)
643 // }
644 // and adding the yield here is good for at least a 10x speedup
645 // when running >2 threads per core (on the NAS LU benchmark).
646 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000648 #else
649 #error Unknown or unsupported architecture
650 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000651
652}
653
654/* -------------------------------------------------------------------------- */
655
656/* -------------------------------------------------------------------------- */
657
658/*!
659@ingroup SYNCHRONIZATION
660@param loc source location information
661@param global_tid thread id.
662
663Execute a barrier.
664*/
665void
666__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
667{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000668 KMP_COUNT_BLOCK(OMP_BARRIER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
670
671 if (! TCR_4(__kmp_init_parallel))
672 __kmp_parallel_initialize();
673
674 if ( __kmp_env_consistency_check ) {
675 if ( loc == 0 ) {
676 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
677 }; // if
678
679 __kmp_check_barrier( global_tid, ct_barrier, loc );
680 }
681
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000682#if OMPT_SUPPORT && OMPT_TRACE
683 ompt_frame_t * ompt_frame;
684 if (ompt_enabled ) {
Jonas Hahnfeld848d6902016-09-14 13:59:39 +0000685 ompt_frame = __ompt_get_task_frame_internal(0);
686 if ( ompt_frame->reenter_runtime_frame == NULL )
687 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000688 }
689#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690 __kmp_threads[ global_tid ]->th.th_ident = loc;
691 // TODO: explicit barrier_wait_id:
692 // this function is called when 'barrier' directive is present or
693 // implicit barrier at the end of a worksharing construct.
694 // 1) better to add a per-thread barrier counter to a thread data structure
695 // 2) set to 0 when a new team is created
696 // 4) no sync is required
697
698 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000699#if OMPT_SUPPORT && OMPT_TRACE
700 if (ompt_enabled ) {
701 ompt_frame->reenter_runtime_frame = NULL;
702 }
703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704}
705
706/* The BARRIER for a MASTER section is always explicit */
707/*!
708@ingroup WORK_SHARING
709@param loc source location information.
710@param global_tid global thread number .
711@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
712*/
713kmp_int32
714__kmpc_master(ident_t *loc, kmp_int32 global_tid)
715{
716 int status = 0;
717
718 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
719
720 if( ! TCR_4( __kmp_init_parallel ) )
721 __kmp_parallel_initialize();
722
Jonathan Peyton45be4502015-08-11 21:36:41 +0000723 if( KMP_MASTER_GTID( global_tid )) {
Jonathan Peyton30138252016-03-03 21:21:05 +0000724 KMP_COUNT_BLOCK(OMP_MASTER);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000725 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726 status = 1;
Jonathan Peyton45be4502015-08-11 21:36:41 +0000727 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000729#if OMPT_SUPPORT && OMPT_TRACE
730 if (status) {
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000731 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000732 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
Jonathan Peyton122dd762015-07-13 18:55:45 +0000733 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
734 kmp_team_t *team = this_thr -> th.th_team;
735
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000736 int tid = __kmp_tid_from_gtid( global_tid );
737 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
738 team->t.ompt_team_info.parallel_id,
739 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
740 }
741 }
742#endif
743
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000745#if KMP_USE_DYNAMIC_LOCK
746 if (status)
747 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
748 else
749 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
750#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751 if (status)
752 __kmp_push_sync( global_tid, ct_master, loc, NULL );
753 else
754 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000755#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000756 }
757
758 return status;
759}
760
761/*!
762@ingroup WORK_SHARING
763@param loc source location information.
764@param global_tid global thread number .
765
766Mark the end of a <tt>master</tt> region. This should only be called by the thread
767that executes the <tt>master</tt> region.
768*/
769void
770__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
771{
772 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
773
774 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
Jonathan Peyton11dc82f2016-05-05 16:15:57 +0000775 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000777#if OMPT_SUPPORT && OMPT_TRACE
778 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
779 kmp_team_t *team = this_thr -> th.th_team;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000780 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000781 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
782 int tid = __kmp_tid_from_gtid( global_tid );
783 ompt_callbacks.ompt_callback(ompt_event_master_end)(
784 team->t.ompt_team_info.parallel_id,
785 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
786 }
787#endif
788
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789 if ( __kmp_env_consistency_check ) {
790 if( global_tid < 0 )
791 KMP_WARNING( ThreadIdentInvalid );
792
793 if( KMP_MASTER_GTID( global_tid ))
794 __kmp_pop_sync( global_tid, ct_master, loc );
795 }
796}
797
798/*!
799@ingroup WORK_SHARING
800@param loc source location information.
801@param gtid global thread number.
802
803Start execution of an <tt>ordered</tt> construct.
804*/
805void
806__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
807{
808 int cid = 0;
809 kmp_info_t *th;
810 KMP_DEBUG_ASSERT( __kmp_init_serial );
811
812 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
813
814 if (! TCR_4(__kmp_init_parallel))
815 __kmp_parallel_initialize();
816
817#if USE_ITT_BUILD
818 __kmp_itt_ordered_prep( gtid );
819 // TODO: ordered_wait_id
820#endif /* USE_ITT_BUILD */
821
822 th = __kmp_threads[ gtid ];
823
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000824#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000825 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000826 /* OMPT state update */
827 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
828 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
829
830 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000831 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000832 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
833 th->th.ompt_thread_info.wait_id);
834 }
835 }
836#endif
837
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
839 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
840 else
841 __kmp_parallel_deo( & gtid, & cid, loc );
842
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000844 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000845 /* OMPT state update */
846 th->th.ompt_thread_info.state = ompt_state_work_parallel;
847 th->th.ompt_thread_info.wait_id = 0;
848
849 /* OMPT event callback */
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000850 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000851 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
852 th->th.ompt_thread_info.wait_id);
853 }
854 }
855#endif
856
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857#if USE_ITT_BUILD
858 __kmp_itt_ordered_start( gtid );
859#endif /* USE_ITT_BUILD */
860}
861
862/*!
863@ingroup WORK_SHARING
864@param loc source location information.
865@param gtid global thread number.
866
867End execution of an <tt>ordered</tt> construct.
868*/
869void
870__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
871{
872 int cid = 0;
873 kmp_info_t *th;
874
875 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
876
877#if USE_ITT_BUILD
878 __kmp_itt_ordered_end( gtid );
879 // TODO: ordered_wait_id
880#endif /* USE_ITT_BUILD */
881
882 th = __kmp_threads[ gtid ];
883
884 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
885 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
886 else
887 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000888
889#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000890 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000891 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
892 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
893 th->th.ompt_thread_info.wait_id);
894 }
895#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896}
897
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000898#if KMP_USE_DYNAMIC_LOCK
899
Jonathan Peytondae13d82015-12-11 21:57:06 +0000900static __forceinline void
901__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000902{
Jonathan Peytondae13d82015-12-11 21:57:06 +0000903 // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
904 void *idx;
905 kmp_indirect_lock_t **lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000906 lck = (kmp_indirect_lock_t **)crit;
Jonathan Peytondae13d82015-12-11 21:57:06 +0000907 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
908 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
909 KMP_SET_I_LOCK_LOCATION(ilk, loc);
910 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
911 KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000912#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000913 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000915 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
916 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000917#if USE_ITT_BUILD
Jonathan Peytondae13d82015-12-11 21:57:06 +0000918 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000919#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +0000920 // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
921 //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000922 }
Jonathan Peytondae13d82015-12-11 21:57:06 +0000923 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000924}
925
926// Fast-path acquire tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000927#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000928 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000929 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
930 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000931 kmp_uint32 spins; \
932 KMP_FSYNC_PREPARE(l); \
933 KMP_INIT_YIELD(spins); \
934 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
935 KMP_YIELD(TRUE); \
936 } else { \
937 KMP_YIELD_SPIN(spins); \
938 } \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000939 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000940 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
941 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
Jonathan Peyton377aa402016-04-14 16:00:37 +0000942 __kmp_spin_backoff(&backoff); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000943 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
944 KMP_YIELD(TRUE); \
945 } else { \
946 KMP_YIELD_SPIN(spins); \
947 } \
948 } \
949 } \
950 KMP_FSYNC_ACQUIRED(l); \
951}
952
953// Fast-path test tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000954#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000955 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000956 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
957 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000958}
959
960// Fast-path release tas lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000961#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
962 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000963 KMP_MB(); \
964}
965
Jonathan Peytondae13d82015-12-11 21:57:06 +0000966#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000967
968# include <unistd.h>
969# include <sys/syscall.h>
970# ifndef FUTEX_WAIT
971# define FUTEX_WAIT 0
972# endif
973# ifndef FUTEX_WAKE
974# define FUTEX_WAKE 1
975# endif
976
977// Fast-path acquire futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000978#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000979 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
980 kmp_int32 gtid_code = (gtid+1) << 1; \
981 KMP_MB(); \
982 KMP_FSYNC_PREPARE(ftx); \
983 kmp_int32 poll_val; \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000984 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
985 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
986 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000987 if (!cond) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000988 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000989 continue; \
990 } \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000991 poll_val |= KMP_LOCK_BUSY(1, futex); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000992 } \
993 kmp_int32 rc; \
994 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
995 continue; \
996 } \
997 gtid_code |= 1; \
998 } \
999 KMP_FSYNC_ACQUIRED(ftx); \
1000}
1001
1002// Fast-path test futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001003#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001004 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
Jonathan Peytoneeec4c82016-06-22 16:36:07 +00001005 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001006 KMP_FSYNC_ACQUIRED(ftx); \
1007 rc = TRUE; \
1008 } else { \
1009 rc = FALSE; \
1010 } \
1011}
1012
1013// Fast-path release futex lock
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001014#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001015 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1016 KMP_MB(); \
1017 KMP_FSYNC_RELEASING(ftx); \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001018 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1019 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1020 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001021 } \
1022 KMP_MB(); \
1023 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1024}
1025
Jonathan Peytondae13d82015-12-11 21:57:06 +00001026#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001027
1028#else // KMP_USE_DYNAMIC_LOCK
1029
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030static kmp_user_lock_p
1031__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1032{
1033 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1034
1035 //
1036 // Because of the double-check, the following load
1037 // doesn't need to be volatile.
1038 //
1039 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1040
1041 if ( lck == NULL ) {
1042 void * idx;
1043
1044 // Allocate & initialize the lock.
1045 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1046 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1047 __kmp_init_user_lock_with_checks( lck );
1048 __kmp_set_user_lock_location( lck, loc );
1049#if USE_ITT_BUILD
1050 __kmp_itt_critical_creating( lck );
1051 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1052 // lock. It is the only place where we can guarantee it. There are chances the lock will
1053 // destroyed with no usage, but it is not a problem, because this is not real event seen
1054 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1055#endif /* USE_ITT_BUILD */
1056
1057 //
1058 // Use a cmpxchg instruction to slam the start of the critical
1059 // section with the lock pointer. If another thread beat us
1060 // to it, deallocate the lock, and use the lock that the other
1061 // thread allocated.
1062 //
1063 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1064
1065 if ( status == 0 ) {
1066 // Deallocate the lock and reload the value.
1067#if USE_ITT_BUILD
1068 __kmp_itt_critical_destroyed( lck );
1069 // Let ITT know the lock is destroyed and the same memory location may be reused for
1070 // another purpose.
1071#endif /* USE_ITT_BUILD */
1072 __kmp_destroy_user_lock_with_checks( lck );
1073 __kmp_user_lock_free( &idx, gtid, lck );
1074 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1075 KMP_DEBUG_ASSERT( lck != NULL );
1076 }
1077 }
1078 return lck;
1079}
1080
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001081#endif // KMP_USE_DYNAMIC_LOCK
1082
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083/*!
1084@ingroup WORK_SHARING
1085@param loc source location information.
1086@param global_tid global thread number .
1087@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1088some other suitably unique value.
1089
1090Enter code protected by a `critical` construct.
1091This function blocks until the executing thread can enter the critical section.
1092*/
1093void
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001094__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1095{
1096#if KMP_USE_DYNAMIC_LOCK
1097 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1098#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001099 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001100 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001101 kmp_user_lock_p lck;
1102
1103 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1104
1105 //TODO: add THR_OVHD_STATE
1106
1107 KMP_CHECK_USER_LOCK_INIT();
1108
1109 if ( ( __kmp_user_lock_kind == lk_tas )
1110 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1111 lck = (kmp_user_lock_p)crit;
1112 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001113#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001114 else if ( ( __kmp_user_lock_kind == lk_futex )
1115 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1116 lck = (kmp_user_lock_p)crit;
1117 }
1118#endif
1119 else { // ticket, queuing or drdpa
1120 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1121 }
1122
1123 if ( __kmp_env_consistency_check )
1124 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1125
1126 /* since the critical directive binds to all threads, not just
1127 * the current team we have to check this even if we are in a
1128 * serialized team */
1129 /* also, even if we are the uber thread, we still have to conduct the lock,
1130 * as we have to contend with sibling threads */
1131
1132#if USE_ITT_BUILD
1133 __kmp_itt_critical_acquiring( lck );
1134#endif /* USE_ITT_BUILD */
1135 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1137
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001138#if USE_ITT_BUILD
1139 __kmp_itt_critical_acquired( lck );
1140#endif /* USE_ITT_BUILD */
1141
Jonathan Peyton93a879c2016-03-21 18:32:26 +00001142 KMP_START_EXPLICIT_TIMER(OMP_critical);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001143 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001144#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001145}
1146
1147#if KMP_USE_DYNAMIC_LOCK
1148
1149// Converts the given hint to an internal lock implementation
1150static __forceinline kmp_dyna_lockseq_t
1151__kmp_map_hint_to_lock(uintptr_t hint)
1152{
1153#if KMP_USE_TSX
1154# define KMP_TSX_LOCK(seq) lockseq_##seq
1155#else
1156# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1157#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001158
1159#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1160# define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1161#else
1162# define KMP_CPUINFO_RTM 0
1163#endif
1164
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001165 // Hints that do not require further logic
1166 if (hint & kmp_lock_hint_hle)
1167 return KMP_TSX_LOCK(hle);
1168 if (hint & kmp_lock_hint_rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001169 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001170 if (hint & kmp_lock_hint_adaptive)
Hal Finkel01bb2402016-03-27 13:24:09 +00001171 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001172
1173 // Rule out conflicting hints first by returning the default lock
1174 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1175 return __kmp_user_lock_seq;
1176 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1177 return __kmp_user_lock_seq;
1178
1179 // Do not even consider speculation when it appears to be contended
1180 if (hint & omp_lock_hint_contended)
1181 return lockseq_queuing;
1182
1183 // Uncontended lock without speculation
1184 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1185 return lockseq_tas;
1186
1187 // HLE lock for speculation
1188 if (hint & omp_lock_hint_speculative)
1189 return KMP_TSX_LOCK(hle);
1190
1191 return __kmp_user_lock_seq;
1192}
1193
1194/*!
1195@ingroup WORK_SHARING
1196@param loc source location information.
1197@param global_tid global thread number.
1198@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
1199or some other suitably unique value.
1200@param hint the lock hint.
1201
1202Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
1203This function blocks until the executing thread can enter the critical section unless the hint suggests use of
1204speculative execution and the hardware supports it.
1205*/
1206void
1207__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1208{
1209 KMP_COUNT_BLOCK(OMP_CRITICAL);
1210 kmp_user_lock_p lck;
1211
1212 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1213
1214 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1215 // Check if it is initialized.
1216 if (*lk == 0) {
1217 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1218 if (KMP_IS_D_LOCK(lckseq)) {
1219 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1220 } else {
1221 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1222 }
1223 }
1224 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1225 // this lock initialization does not follow the normal dispatch path (lock table is not used).
1226 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1227 lck = (kmp_user_lock_p)lk;
1228 if (__kmp_env_consistency_check) {
1229 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1230 }
1231# if USE_ITT_BUILD
1232 __kmp_itt_critical_acquiring(lck);
1233# endif
1234# if KMP_USE_INLINED_TAS
1235 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1236 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1237 } else
1238# elif KMP_USE_INLINED_FUTEX
1239 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1240 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1241 } else
1242# endif
1243 {
1244 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1245 }
1246 } else {
1247 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1248 lck = ilk->lock;
1249 if (__kmp_env_consistency_check) {
1250 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1251 }
1252# if USE_ITT_BUILD
1253 __kmp_itt_critical_acquiring(lck);
1254# endif
1255 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1256 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001257
Jim Cownie5e8470a2013-09-27 10:38:44 +00001258#if USE_ITT_BUILD
1259 __kmp_itt_critical_acquired( lck );
1260#endif /* USE_ITT_BUILD */
1261
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001262 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001264} // __kmpc_critical_with_hint
1265
1266#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001267
1268/*!
1269@ingroup WORK_SHARING
1270@param loc source location information.
1271@param global_tid global thread number .
1272@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1273some other suitably unique value.
1274
1275Leave a critical section, releasing any lock that was held during its execution.
1276*/
1277void
1278__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1279{
1280 kmp_user_lock_p lck;
1281
1282 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1283
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001284#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001285 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001286 lck = (kmp_user_lock_p)crit;
1287 KMP_ASSERT(lck != NULL);
1288 if (__kmp_env_consistency_check) {
1289 __kmp_pop_sync(global_tid, ct_critical, loc);
1290 }
1291# if USE_ITT_BUILD
1292 __kmp_itt_critical_releasing( lck );
1293# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00001294# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001295 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001296 KMP_RELEASE_TAS_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001297 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00001298# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001299 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001300 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001301 } else
1302# endif
1303 {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001304 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001305 }
1306 } else {
1307 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1308 KMP_ASSERT(ilk != NULL);
1309 lck = ilk->lock;
1310 if (__kmp_env_consistency_check) {
1311 __kmp_pop_sync(global_tid, ct_critical, loc);
1312 }
1313# if USE_ITT_BUILD
1314 __kmp_itt_critical_releasing( lck );
1315# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00001316 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001317 }
1318
1319#else // KMP_USE_DYNAMIC_LOCK
1320
Jim Cownie5e8470a2013-09-27 10:38:44 +00001321 if ( ( __kmp_user_lock_kind == lk_tas )
1322 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1323 lck = (kmp_user_lock_p)crit;
1324 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001325#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001326 else if ( ( __kmp_user_lock_kind == lk_futex )
1327 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1328 lck = (kmp_user_lock_p)crit;
1329 }
1330#endif
1331 else { // ticket, queuing or drdpa
1332 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1333 }
1334
1335 KMP_ASSERT(lck != NULL);
1336
1337 if ( __kmp_env_consistency_check )
1338 __kmp_pop_sync( global_tid, ct_critical, loc );
1339
1340#if USE_ITT_BUILD
1341 __kmp_itt_critical_releasing( lck );
1342#endif /* USE_ITT_BUILD */
1343 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001344 __kmp_release_user_lock_with_checks( lck, global_tid );
1345
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001346#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001347 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001348 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1349 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1350 (uint64_t) lck);
1351 }
1352#endif
1353
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001354#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001355 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1357}
1358
1359/*!
1360@ingroup SYNCHRONIZATION
1361@param loc source location information
1362@param global_tid thread id.
1363@return one if the thread should execute the master block, zero otherwise
1364
1365Start execution of a combined barrier and master. The barrier is executed inside this function.
1366*/
1367kmp_int32
1368__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1369{
1370 int status;
1371
1372 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1373
1374 if (! TCR_4(__kmp_init_parallel))
1375 __kmp_parallel_initialize();
1376
1377 if ( __kmp_env_consistency_check )
1378 __kmp_check_barrier( global_tid, ct_barrier, loc );
1379
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001380#if USE_ITT_NOTIFY
1381 __kmp_threads[global_tid]->th.th_ident = loc;
1382#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001383 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1384
1385 return (status != 0) ? 0 : 1;
1386}
1387
1388/*!
1389@ingroup SYNCHRONIZATION
1390@param loc source location information
1391@param global_tid thread id.
1392
1393Complete the execution of a combined barrier and master. This function should
1394only be called at the completion of the <tt>master</tt> code. Other threads will
1395still be waiting at the barrier and this call releases them.
1396*/
1397void
1398__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1399{
1400 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1401
1402 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1403}
1404
1405/*!
1406@ingroup SYNCHRONIZATION
1407@param loc source location information
1408@param global_tid thread id.
1409@return one if the thread should execute the master block, zero otherwise
1410
1411Start execution of a combined barrier and master(nowait) construct.
1412The barrier is executed inside this function.
1413There is no equivalent "end" function, since the
1414*/
1415kmp_int32
1416__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1417{
1418 kmp_int32 ret;
1419
1420 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1421
1422 if (! TCR_4(__kmp_init_parallel))
1423 __kmp_parallel_initialize();
1424
1425 if ( __kmp_env_consistency_check ) {
1426 if ( loc == 0 ) {
1427 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1428 }
1429 __kmp_check_barrier( global_tid, ct_barrier, loc );
1430 }
1431
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001432#if USE_ITT_NOTIFY
1433 __kmp_threads[global_tid]->th.th_ident = loc;
1434#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1436
1437 ret = __kmpc_master (loc, global_tid);
1438
1439 if ( __kmp_env_consistency_check ) {
1440 /* there's no __kmpc_end_master called; so the (stats) */
1441 /* actions of __kmpc_end_master are done here */
1442
1443 if ( global_tid < 0 ) {
1444 KMP_WARNING( ThreadIdentInvalid );
1445 }
1446 if (ret) {
1447 /* only one thread should do the pop since only */
1448 /* one did the push (see __kmpc_master()) */
1449
1450 __kmp_pop_sync( global_tid, ct_master, loc );
1451 }
1452 }
1453
1454 return (ret);
1455}
1456
1457/* The BARRIER for a SINGLE process section is always explicit */
1458/*!
1459@ingroup WORK_SHARING
1460@param loc source location information
1461@param global_tid global thread number
1462@return One if this thread should execute the single construct, zero otherwise.
1463
1464Test whether to execute a <tt>single</tt> construct.
1465There are no implicit barriers in the two "single" calls, rather the compiler should
1466introduce an explicit barrier if it is required.
1467*/
1468
1469kmp_int32
1470__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1471{
1472 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Jonathan Peyton30138252016-03-03 21:21:05 +00001473
1474 if (rc) {
1475 // We are going to execute the single statement, so we should count it.
1476 KMP_COUNT_BLOCK(OMP_SINGLE);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001477 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001478 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479
1480#if OMPT_SUPPORT && OMPT_TRACE
1481 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1482 kmp_team_t *team = this_thr -> th.th_team;
1483 int tid = __kmp_tid_from_gtid( global_tid );
1484
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001485 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001486 if (rc) {
1487 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1488 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1489 team->t.ompt_team_info.parallel_id,
1490 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1491 team->t.ompt_team_info.microtask);
1492 }
1493 } else {
1494 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1495 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1496 team->t.ompt_team_info.parallel_id,
1497 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1498 }
1499 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1500 }
1501 }
1502#endif
1503
Jim Cownie5e8470a2013-09-27 10:38:44 +00001504 return rc;
1505}
1506
1507/*!
1508@ingroup WORK_SHARING
1509@param loc source location information
1510@param global_tid global thread number
1511
1512Mark the end of a <tt>single</tt> construct. This function should
1513only be called by the thread that executed the block of code protected
1514by the `single` construct.
1515*/
1516void
1517__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1518{
1519 __kmp_exit_single( global_tid );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001520 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001521
1522#if OMPT_SUPPORT && OMPT_TRACE
1523 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1524 kmp_team_t *team = this_thr -> th.th_team;
1525 int tid = __kmp_tid_from_gtid( global_tid );
1526
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001527 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001528 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1529 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1530 team->t.ompt_team_info.parallel_id,
1531 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1532 }
1533#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001534}
1535
1536/*!
1537@ingroup WORK_SHARING
1538@param loc Source location
1539@param global_tid Global thread id
1540
1541Mark the end of a statically scheduled loop.
1542*/
1543void
1544__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1545{
1546 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1547
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001548#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001549 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001550 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001551 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1552 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001553 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
Jonas Hahnfelde46a4942016-03-24 12:52:20 +00001554 team_info->parallel_id, task_info->task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555 }
1556#endif
1557
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558 if ( __kmp_env_consistency_check )
1559 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1560}
1561
1562/*
1563 * User routines which take C-style arguments (call by value)
1564 * different from the Fortran equivalent routines
1565 */
1566
1567void
1568ompc_set_num_threads( int arg )
1569{
1570// !!!!! TODO: check the per-task binding
1571 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1572}
1573
1574void
1575ompc_set_dynamic( int flag )
1576{
1577 kmp_info_t *thread;
1578
1579 /* For the thread-private implementation of the internal controls */
1580 thread = __kmp_entry_thread();
1581
1582 __kmp_save_internal_controls( thread );
1583
1584 set__dynamic( thread, flag ? TRUE : FALSE );
1585}
1586
1587void
1588ompc_set_nested( int flag )
1589{
1590 kmp_info_t *thread;
1591
1592 /* For the thread-private internal controls implementation */
1593 thread = __kmp_entry_thread();
1594
1595 __kmp_save_internal_controls( thread );
1596
1597 set__nested( thread, flag ? TRUE : FALSE );
1598}
1599
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600void
1601ompc_set_max_active_levels( int max_active_levels )
1602{
1603 /* TO DO */
1604 /* we want per-task implementation of this internal control */
1605
1606 /* For the per-thread internal controls implementation */
1607 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1608}
1609
1610void
1611ompc_set_schedule( omp_sched_t kind, int modifier )
1612{
1613// !!!!! TODO: check the per-task binding
1614 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1615}
1616
1617int
1618ompc_get_ancestor_thread_num( int level )
1619{
1620 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1621}
1622
1623int
1624ompc_get_team_size( int level )
1625{
1626 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1627}
1628
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629void
1630kmpc_set_stacksize( int arg )
1631{
1632 // __kmp_aux_set_stacksize initializes the library if needed
1633 __kmp_aux_set_stacksize( arg );
1634}
1635
1636void
1637kmpc_set_stacksize_s( size_t arg )
1638{
1639 // __kmp_aux_set_stacksize initializes the library if needed
1640 __kmp_aux_set_stacksize( arg );
1641}
1642
1643void
1644kmpc_set_blocktime( int arg )
1645{
1646 int gtid, tid;
1647 kmp_info_t *thread;
1648
1649 gtid = __kmp_entry_gtid();
1650 tid = __kmp_tid_from_gtid(gtid);
1651 thread = __kmp_thread_from_gtid(gtid);
1652
1653 __kmp_aux_set_blocktime( arg, thread, tid );
1654}
1655
1656void
1657kmpc_set_library( int arg )
1658{
1659 // __kmp_user_set_library initializes the library if needed
1660 __kmp_user_set_library( (enum library_type)arg );
1661}
1662
1663void
1664kmpc_set_defaults( char const * str )
1665{
1666 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001667 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001668}
1669
Jonathan Peyton067325f2016-05-31 19:01:15 +00001670void
1671kmpc_set_disp_num_buffers( int arg )
1672{
1673 // ignore after initialization because some teams have already
1674 // allocated dispatch buffers
1675 if( __kmp_init_serial == 0 && arg > 0 )
1676 __kmp_dispatch_num_buffers = arg;
1677}
1678
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679int
1680kmpc_set_affinity_mask_proc( int proc, void **mask )
1681{
Alp Toker98758b02014-03-02 04:12:06 +00001682#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683 return -1;
1684#else
1685 if ( ! TCR_4(__kmp_init_middle) ) {
1686 __kmp_middle_initialize();
1687 }
1688 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1689#endif
1690}
1691
1692int
1693kmpc_unset_affinity_mask_proc( int proc, void **mask )
1694{
Alp Toker98758b02014-03-02 04:12:06 +00001695#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696 return -1;
1697#else
1698 if ( ! TCR_4(__kmp_init_middle) ) {
1699 __kmp_middle_initialize();
1700 }
1701 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1702#endif
1703}
1704
1705int
1706kmpc_get_affinity_mask_proc( int proc, void **mask )
1707{
Alp Toker98758b02014-03-02 04:12:06 +00001708#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 return -1;
1710#else
1711 if ( ! TCR_4(__kmp_init_middle) ) {
1712 __kmp_middle_initialize();
1713 }
1714 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1715#endif
1716}
1717
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718
1719/* -------------------------------------------------------------------------- */
1720/*!
1721@ingroup THREADPRIVATE
1722@param loc source location information
1723@param gtid global thread number
1724@param cpy_size size of the cpy_data buffer
1725@param cpy_data pointer to data to be copied
1726@param cpy_func helper function to call for copying data
1727@param didit flag variable: 1=single thread; 0=not single thread
1728
1729__kmpc_copyprivate implements the interface for the private data broadcast needed for
1730the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1731All threads participating in the parallel region call this routine.
1732One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1733and all other threads should have that variable set to 0.
1734All threads pass a pointer to a data buffer (cpy_data) that they have built.
1735
1736The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1737clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1738race conditions, so the code generation for the single region should avoid generating a barrier
1739after the call to @ref __kmpc_copyprivate.
1740
1741The <tt>gtid</tt> parameter is the global thread id for the current thread.
1742The <tt>loc</tt> parameter is a pointer to source location information.
1743
1744Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1745to a team-private location, then the other threads will each call the function pointed to by
1746the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1747
1748The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1749and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1750the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1751to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1752
1753The interface to cpy_func is as follows:
1754@code
1755void cpy_func( void *destination, void *source )
1756@endcode
1757where void *destination is the cpy_data pointer for the thread being copied to
1758and void *source is the cpy_data pointer for the thread being copied from.
1759*/
1760void
1761__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1762{
1763 void **data_ptr;
1764
1765 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1766
1767 KMP_MB();
1768
1769 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1770
1771 if ( __kmp_env_consistency_check ) {
1772 if ( loc == 0 ) {
1773 KMP_WARNING( ConstructIdentInvalid );
1774 }
1775 }
1776
1777 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1778
1779 if (didit) *data_ptr = cpy_data;
1780
1781 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001782#if USE_ITT_NOTIFY
1783 __kmp_threads[gtid]->th.th_ident = loc;
1784#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1786
1787 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1788
1789 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1790 /* Nesting checks are already handled by the single construct checks */
1791
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001792#if USE_ITT_NOTIFY
1793 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1794#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001795 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1796}
1797
1798/* -------------------------------------------------------------------------- */
1799
1800#define INIT_LOCK __kmp_init_user_lock_with_checks
1801#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1802#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1803#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1804#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1805#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1806#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1807#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1808#define TEST_LOCK __kmp_test_user_lock_with_checks
1809#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1810#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1811#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1812
1813
1814/*
1815 * TODO: Make check abort messages use location info & pass it
1816 * into with_checks routines
1817 */
1818
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001819#if KMP_USE_DYNAMIC_LOCK
1820
1821// internal lock initializer
1822static __forceinline void
1823__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1824{
1825 if (KMP_IS_D_LOCK(seq)) {
1826 KMP_INIT_D_LOCK(lock, seq);
1827#if USE_ITT_BUILD
1828 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1829#endif
1830 } else {
1831 KMP_INIT_I_LOCK(lock, seq);
1832#if USE_ITT_BUILD
1833 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1834 __kmp_itt_lock_creating(ilk->lock, loc);
1835#endif
1836 }
1837}
1838
1839// internal nest lock initializer
1840static __forceinline void
1841__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1842{
1843#if KMP_USE_TSX
1844 // Don't have nested lock implementation for speculative locks
1845 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1846 seq = __kmp_user_lock_seq;
1847#endif
1848 switch (seq) {
1849 case lockseq_tas:
1850 seq = lockseq_nested_tas;
1851 break;
1852#if KMP_USE_FUTEX
1853 case lockseq_futex:
1854 seq = lockseq_nested_futex;
1855 break;
1856#endif
1857 case lockseq_ticket:
1858 seq = lockseq_nested_ticket;
1859 break;
1860 case lockseq_queuing:
1861 seq = lockseq_nested_queuing;
1862 break;
1863 case lockseq_drdpa:
1864 seq = lockseq_nested_drdpa;
1865 break;
1866 default:
1867 seq = lockseq_nested_queuing;
1868 }
1869 KMP_INIT_I_LOCK(lock, seq);
1870#if USE_ITT_BUILD
1871 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1872 __kmp_itt_lock_creating(ilk->lock, loc);
1873#endif
1874}
1875
1876/* initialize the lock with a hint */
1877void
1878__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1879{
1880 KMP_DEBUG_ASSERT(__kmp_init_serial);
1881 if (__kmp_env_consistency_check && user_lock == NULL) {
1882 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1883 }
1884
1885 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1886}
1887
1888/* initialize the lock with a hint */
1889void
1890__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1891{
1892 KMP_DEBUG_ASSERT(__kmp_init_serial);
1893 if (__kmp_env_consistency_check && user_lock == NULL) {
1894 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1895 }
1896
1897 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1898}
1899
1900#endif // KMP_USE_DYNAMIC_LOCK
1901
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902/* initialize the lock */
1903void
1904__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001905#if KMP_USE_DYNAMIC_LOCK
1906 KMP_DEBUG_ASSERT(__kmp_init_serial);
1907 if (__kmp_env_consistency_check && user_lock == NULL) {
1908 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1909 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001910 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001911
1912#else // KMP_USE_DYNAMIC_LOCK
1913
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914 static char const * const func = "omp_init_lock";
1915 kmp_user_lock_p lck;
1916 KMP_DEBUG_ASSERT( __kmp_init_serial );
1917
1918 if ( __kmp_env_consistency_check ) {
1919 if ( user_lock == NULL ) {
1920 KMP_FATAL( LockIsUninitialized, func );
1921 }
1922 }
1923
1924 KMP_CHECK_USER_LOCK_INIT();
1925
1926 if ( ( __kmp_user_lock_kind == lk_tas )
1927 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1928 lck = (kmp_user_lock_p)user_lock;
1929 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001930#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931 else if ( ( __kmp_user_lock_kind == lk_futex )
1932 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1933 lck = (kmp_user_lock_p)user_lock;
1934 }
1935#endif
1936 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001937 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939 INIT_LOCK( lck );
1940 __kmp_set_user_lock_location( lck, loc );
1941
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001942#if OMPT_SUPPORT && OMPT_TRACE
1943 if (ompt_enabled &&
1944 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1945 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1946 }
1947#endif
1948
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949#if USE_ITT_BUILD
1950 __kmp_itt_lock_creating( lck );
1951#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001952
1953#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954} // __kmpc_init_lock
1955
1956/* initialize the lock */
1957void
1958__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001959#if KMP_USE_DYNAMIC_LOCK
1960
1961 KMP_DEBUG_ASSERT(__kmp_init_serial);
1962 if (__kmp_env_consistency_check && user_lock == NULL) {
1963 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1964 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001965 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001966
1967#else // KMP_USE_DYNAMIC_LOCK
1968
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 static char const * const func = "omp_init_nest_lock";
1970 kmp_user_lock_p lck;
1971 KMP_DEBUG_ASSERT( __kmp_init_serial );
1972
1973 if ( __kmp_env_consistency_check ) {
1974 if ( user_lock == NULL ) {
1975 KMP_FATAL( LockIsUninitialized, func );
1976 }
1977 }
1978
1979 KMP_CHECK_USER_LOCK_INIT();
1980
1981 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1982 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1983 lck = (kmp_user_lock_p)user_lock;
1984 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001985#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986 else if ( ( __kmp_user_lock_kind == lk_futex )
1987 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1988 <= OMP_NEST_LOCK_T_SIZE ) ) {
1989 lck = (kmp_user_lock_p)user_lock;
1990 }
1991#endif
1992 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001993 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994 }
1995
1996 INIT_NESTED_LOCK( lck );
1997 __kmp_set_user_lock_location( lck, loc );
1998
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001999#if OMPT_SUPPORT && OMPT_TRACE
2000 if (ompt_enabled &&
2001 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
2002 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
2003 }
2004#endif
2005
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006#if USE_ITT_BUILD
2007 __kmp_itt_lock_creating( lck );
2008#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002009
2010#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011} // __kmpc_init_nest_lock
2012
2013void
2014__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002015#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002017# if USE_ITT_BUILD
2018 kmp_user_lock_p lck;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002019 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2020 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002021 } else {
2022 lck = (kmp_user_lock_p)user_lock;
2023 }
2024 __kmp_itt_lock_destroyed(lck);
2025# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002026 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002027#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002028 kmp_user_lock_p lck;
2029
2030 if ( ( __kmp_user_lock_kind == lk_tas )
2031 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2032 lck = (kmp_user_lock_p)user_lock;
2033 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002034#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035 else if ( ( __kmp_user_lock_kind == lk_futex )
2036 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2037 lck = (kmp_user_lock_p)user_lock;
2038 }
2039#endif
2040 else {
2041 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2042 }
2043
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002044#if OMPT_SUPPORT && OMPT_TRACE
2045 if (ompt_enabled &&
2046 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2047 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2048 }
2049#endif
2050
Jim Cownie5e8470a2013-09-27 10:38:44 +00002051#if USE_ITT_BUILD
2052 __kmp_itt_lock_destroyed( lck );
2053#endif /* USE_ITT_BUILD */
2054 DESTROY_LOCK( lck );
2055
2056 if ( ( __kmp_user_lock_kind == lk_tas )
2057 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2058 ;
2059 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002060#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002061 else if ( ( __kmp_user_lock_kind == lk_futex )
2062 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2063 ;
2064 }
2065#endif
2066 else {
2067 __kmp_user_lock_free( user_lock, gtid, lck );
2068 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002069#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002070} // __kmpc_destroy_lock
2071
2072/* destroy the lock */
2073void
2074__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002075#if KMP_USE_DYNAMIC_LOCK
2076
2077# if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002078 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002079 __kmp_itt_lock_destroyed(ilk->lock);
2080# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002081 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002082
2083#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002084
2085 kmp_user_lock_p lck;
2086
2087 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2088 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2089 lck = (kmp_user_lock_p)user_lock;
2090 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002091#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002092 else if ( ( __kmp_user_lock_kind == lk_futex )
2093 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2094 <= OMP_NEST_LOCK_T_SIZE ) ) {
2095 lck = (kmp_user_lock_p)user_lock;
2096 }
2097#endif
2098 else {
2099 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2100 }
2101
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002102#if OMPT_SUPPORT && OMPT_TRACE
2103 if (ompt_enabled &&
2104 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2105 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2106 }
2107#endif
2108
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109#if USE_ITT_BUILD
2110 __kmp_itt_lock_destroyed( lck );
2111#endif /* USE_ITT_BUILD */
2112
2113 DESTROY_NESTED_LOCK( lck );
2114
2115 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2116 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2117 ;
2118 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002119#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120 else if ( ( __kmp_user_lock_kind == lk_futex )
2121 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2122 <= OMP_NEST_LOCK_T_SIZE ) ) {
2123 ;
2124 }
2125#endif
2126 else {
2127 __kmp_user_lock_free( user_lock, gtid, lck );
2128 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002129#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130} // __kmpc_destroy_nest_lock
2131
2132void
2133__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002134 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002135#if KMP_USE_DYNAMIC_LOCK
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002136 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002137# if USE_ITT_BUILD
2138 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2139# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002140# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002141 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002142 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002143 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002144# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002145 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002146 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002147 } else
2148# endif
2149 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002150 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002151 }
2152# if USE_ITT_BUILD
2153 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2154# endif
2155
2156#else // KMP_USE_DYNAMIC_LOCK
2157
Jim Cownie5e8470a2013-09-27 10:38:44 +00002158 kmp_user_lock_p lck;
2159
2160 if ( ( __kmp_user_lock_kind == lk_tas )
2161 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2162 lck = (kmp_user_lock_p)user_lock;
2163 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002164#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002165 else if ( ( __kmp_user_lock_kind == lk_futex )
2166 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2167 lck = (kmp_user_lock_p)user_lock;
2168 }
2169#endif
2170 else {
2171 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2172 }
2173
2174#if USE_ITT_BUILD
2175 __kmp_itt_lock_acquiring( lck );
2176#endif /* USE_ITT_BUILD */
2177
2178 ACQUIRE_LOCK( lck, gtid );
2179
2180#if USE_ITT_BUILD
2181 __kmp_itt_lock_acquired( lck );
2182#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002184#if OMPT_SUPPORT && OMPT_TRACE
2185 if (ompt_enabled &&
2186 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2187 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2188 }
2189#endif
2190
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002191#endif // KMP_USE_DYNAMIC_LOCK
2192}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002193
2194void
2195__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002196#if KMP_USE_DYNAMIC_LOCK
2197
2198# if USE_ITT_BUILD
2199 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2200# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002201 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002202# if USE_ITT_BUILD
2203 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2204#endif
2205
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002206#if OMPT_SUPPORT && OMPT_TRACE
2207 if (ompt_enabled) {
2208 // missing support here: need to know whether acquired first or not
2209 }
2210#endif
2211
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002212#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002213 int acquire_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002214 kmp_user_lock_p lck;
2215
2216 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2217 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2218 lck = (kmp_user_lock_p)user_lock;
2219 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002220#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002221 else if ( ( __kmp_user_lock_kind == lk_futex )
2222 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2223 <= OMP_NEST_LOCK_T_SIZE ) ) {
2224 lck = (kmp_user_lock_p)user_lock;
2225 }
2226#endif
2227 else {
2228 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2229 }
2230
2231#if USE_ITT_BUILD
2232 __kmp_itt_lock_acquiring( lck );
2233#endif /* USE_ITT_BUILD */
2234
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002235 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236
2237#if USE_ITT_BUILD
2238 __kmp_itt_lock_acquired( lck );
2239#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002240
2241#if OMPT_SUPPORT && OMPT_TRACE
2242 if (ompt_enabled) {
2243 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2244 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2245 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2246 } else {
2247 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2248 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2249 }
2250 }
2251#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002252
2253#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002254}
2255
2256void
2257__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2258{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002259#if KMP_USE_DYNAMIC_LOCK
2260
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002261 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002262# if USE_ITT_BUILD
2263 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2264# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002265# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002266 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002267 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002268 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002269# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002270 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002271 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002272 } else
2273# endif
2274 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002275 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002276 }
2277
2278#else // KMP_USE_DYNAMIC_LOCK
2279
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 kmp_user_lock_p lck;
2281
2282 /* Can't use serial interval since not block structured */
2283 /* release the lock */
2284
2285 if ( ( __kmp_user_lock_kind == lk_tas )
2286 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002287#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002288 // "fast" path implemented to fix customer performance issue
2289#if USE_ITT_BUILD
2290 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2291#endif /* USE_ITT_BUILD */
2292 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2293 KMP_MB();
2294 return;
2295#else
2296 lck = (kmp_user_lock_p)user_lock;
2297#endif
2298 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002299#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300 else if ( ( __kmp_user_lock_kind == lk_futex )
2301 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2302 lck = (kmp_user_lock_p)user_lock;
2303 }
2304#endif
2305 else {
2306 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2307 }
2308
2309#if USE_ITT_BUILD
2310 __kmp_itt_lock_releasing( lck );
2311#endif /* USE_ITT_BUILD */
2312
2313 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002314
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002315#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002316 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002317 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2318 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2319 }
2320#endif
2321
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002322#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323}
2324
2325/* release the lock */
2326void
2327__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2328{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002329#if KMP_USE_DYNAMIC_LOCK
2330
2331# if USE_ITT_BUILD
2332 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2333# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002334 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002335
2336#else // KMP_USE_DYNAMIC_LOCK
2337
Jim Cownie5e8470a2013-09-27 10:38:44 +00002338 kmp_user_lock_p lck;
2339
2340 /* Can't use serial interval since not block structured */
2341
2342 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2343 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002344#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002345 // "fast" path implemented to fix customer performance issue
2346 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2347#if USE_ITT_BUILD
2348 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2349#endif /* USE_ITT_BUILD */
2350 if ( --(tl->lk.depth_locked) == 0 ) {
2351 TCW_4(tl->lk.poll, 0);
2352 }
2353 KMP_MB();
2354 return;
2355#else
2356 lck = (kmp_user_lock_p)user_lock;
2357#endif
2358 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002359#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002360 else if ( ( __kmp_user_lock_kind == lk_futex )
2361 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2362 <= OMP_NEST_LOCK_T_SIZE ) ) {
2363 lck = (kmp_user_lock_p)user_lock;
2364 }
2365#endif
2366 else {
2367 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2368 }
2369
2370#if USE_ITT_BUILD
2371 __kmp_itt_lock_releasing( lck );
2372#endif /* USE_ITT_BUILD */
2373
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002374 int release_status;
2375 release_status = RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002376#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002377 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002378 if (release_status == KMP_LOCK_RELEASED) {
2379 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2380 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2381 (uint64_t) lck);
2382 }
2383 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2384 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2385 (uint64_t) lck);
2386 }
2387 }
2388#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002389
2390#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002391}
2392
2393/* try to acquire the lock */
2394int
2395__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2396{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002397 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002398
2399#if KMP_USE_DYNAMIC_LOCK
2400 int rc;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002401 int tag = KMP_EXTRACT_D_TAG(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002402# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002403 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002404# endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00002405# if KMP_USE_INLINED_TAS
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002406 if (tag == locktag_tas && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002407 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002408 } else
Jonathan Peytondae13d82015-12-11 21:57:06 +00002409# elif KMP_USE_INLINED_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002410 if (tag == locktag_futex && !__kmp_env_consistency_check) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002411 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002412 } else
2413# endif
2414 {
Jonathan Peytona03533d2015-12-11 21:49:08 +00002415 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002416 }
2417 if (rc) {
2418# if USE_ITT_BUILD
2419 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2420# endif
2421 return FTN_TRUE;
2422 } else {
2423# if USE_ITT_BUILD
2424 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2425# endif
2426 return FTN_FALSE;
2427 }
2428
2429#else // KMP_USE_DYNAMIC_LOCK
2430
Jim Cownie5e8470a2013-09-27 10:38:44 +00002431 kmp_user_lock_p lck;
2432 int rc;
2433
2434 if ( ( __kmp_user_lock_kind == lk_tas )
2435 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2436 lck = (kmp_user_lock_p)user_lock;
2437 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002438#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439 else if ( ( __kmp_user_lock_kind == lk_futex )
2440 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2441 lck = (kmp_user_lock_p)user_lock;
2442 }
2443#endif
2444 else {
2445 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2446 }
2447
2448#if USE_ITT_BUILD
2449 __kmp_itt_lock_acquiring( lck );
2450#endif /* USE_ITT_BUILD */
2451
2452 rc = TEST_LOCK( lck, gtid );
2453#if USE_ITT_BUILD
2454 if ( rc ) {
2455 __kmp_itt_lock_acquired( lck );
2456 } else {
2457 __kmp_itt_lock_cancelled( lck );
2458 }
2459#endif /* USE_ITT_BUILD */
2460 return ( rc ? FTN_TRUE : FTN_FALSE );
2461
2462 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002463
2464#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465}
2466
2467/* try to acquire the lock */
2468int
2469__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2470{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002471#if KMP_USE_DYNAMIC_LOCK
2472 int rc;
2473# if USE_ITT_BUILD
2474 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2475# endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002476 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002477# if USE_ITT_BUILD
2478 if (rc) {
2479 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2480 } else {
2481 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2482 }
2483# endif
2484 return rc;
2485
2486#else // KMP_USE_DYNAMIC_LOCK
2487
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488 kmp_user_lock_p lck;
2489 int rc;
2490
2491 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2492 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2493 lck = (kmp_user_lock_p)user_lock;
2494 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002495#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00002496 else if ( ( __kmp_user_lock_kind == lk_futex )
2497 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2498 <= OMP_NEST_LOCK_T_SIZE ) ) {
2499 lck = (kmp_user_lock_p)user_lock;
2500 }
2501#endif
2502 else {
2503 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2504 }
2505
2506#if USE_ITT_BUILD
2507 __kmp_itt_lock_acquiring( lck );
2508#endif /* USE_ITT_BUILD */
2509
2510 rc = TEST_NESTED_LOCK( lck, gtid );
2511#if USE_ITT_BUILD
2512 if ( rc ) {
2513 __kmp_itt_lock_acquired( lck );
2514 } else {
2515 __kmp_itt_lock_cancelled( lck );
2516 }
2517#endif /* USE_ITT_BUILD */
2518 return rc;
2519
2520 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002521
2522#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002523}
2524
2525
2526/*--------------------------------------------------------------------------------------------------------------------*/
2527
2528/*
2529 * Interface to fast scalable reduce methods routines
2530 */
2531
2532// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2533// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2534// AT: which solution is better?
2535#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2536 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2537
2538#define __KMP_GET_REDUCTION_METHOD(gtid) \
2539 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2540
2541// description of the packed_reduction_method variable: look at the macros in kmp.h
2542
2543
2544// used in a critical section reduce block
2545static __forceinline void
2546__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2547
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002548 // this lock was visible to a customer and to the threading profile tool as a serial overhead span
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549 // (although it's used for an internal purpose only)
2550 // why was it visible in previous implementation?
2551 // should we keep it visible in new reduce block?
2552 kmp_user_lock_p lck;
2553
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002554#if KMP_USE_DYNAMIC_LOCK
2555
Jonathan Peytondae13d82015-12-11 21:57:06 +00002556 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2557 // Check if it is initialized.
2558 if (*lk == 0) {
2559 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2560 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2561 } else {
2562 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002563 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002564 }
2565 // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2566 // this lock initialization does not follow the normal dispatch path (lock table is not used).
2567 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2568 lck = (kmp_user_lock_p)lk;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002569 KMP_DEBUG_ASSERT(lck != NULL);
2570 if (__kmp_env_consistency_check) {
2571 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2572 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002573 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002574 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002575 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2576 lck = ilk->lock;
2577 KMP_DEBUG_ASSERT(lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002578 if (__kmp_env_consistency_check) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00002579 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002580 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00002581 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002582 }
2583
2584#else // KMP_USE_DYNAMIC_LOCK
2585
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586 // We know that the fast reduction code is only emitted by Intel compilers
2587 // with 32 byte critical sections. If there isn't enough space, then we
2588 // have to use a pointer.
2589 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2590 lck = (kmp_user_lock_p)crit;
2591 }
2592 else {
2593 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2594 }
2595 KMP_DEBUG_ASSERT( lck != NULL );
2596
2597 if ( __kmp_env_consistency_check )
2598 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2599
2600 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002601
2602#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603}
2604
2605// used in a critical section reduce block
2606static __forceinline void
2607__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2608
2609 kmp_user_lock_p lck;
2610
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002611#if KMP_USE_DYNAMIC_LOCK
2612
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002613 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002614 lck = (kmp_user_lock_p)crit;
2615 if (__kmp_env_consistency_check)
2616 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002617 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002618 } else {
2619 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2620 if (__kmp_env_consistency_check)
2621 __kmp_pop_sync(global_tid, ct_critical, loc);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00002622 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002623 }
2624
2625#else // KMP_USE_DYNAMIC_LOCK
2626
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2628 // sections. If there isn't enough space, then we have to use a pointer.
2629 if ( __kmp_base_user_lock_size > 32 ) {
2630 lck = *( (kmp_user_lock_p *) crit );
2631 KMP_ASSERT( lck != NULL );
2632 } else {
2633 lck = (kmp_user_lock_p) crit;
2634 }
2635
2636 if ( __kmp_env_consistency_check )
2637 __kmp_pop_sync( global_tid, ct_critical, loc );
2638
2639 __kmp_release_user_lock_with_checks( lck, global_tid );
2640
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002641#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642} // __kmp_end_critical_section_reduce_block
2643
2644
2645/* 2.a.i. Reduce Block without a terminating barrier */
2646/*!
2647@ingroup SYNCHRONIZATION
2648@param loc source location information
2649@param global_tid global thread number
2650@param num_vars number of items (variables) to be reduced
2651@param reduce_size size of data in bytes to be reduced
2652@param reduce_data pointer to data to be reduced
2653@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2654@param lck pointer to the unique lock data structure
2655@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2656
2657The nowait version is used for a reduce clause with the nowait argument.
2658*/
2659kmp_int32
2660__kmpc_reduce_nowait(
2661 ident_t *loc, kmp_int32 global_tid,
2662 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2663 kmp_critical_name *lck ) {
2664
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002665 KMP_COUNT_BLOCK(REDUCE_nowait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002666 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002668#if OMP_40_ENABLED
2669 kmp_team_t *team;
2670 kmp_info_t *th;
2671 int teams_swapped = 0, task_state;
2672#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002673 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2674
2675 // why do we need this initialization here at all?
2676 // Reduction clause can not be used as a stand-alone directive.
2677
2678 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2679 // possible detection of false-positive race by the threadchecker ???
2680 if( ! TCR_4( __kmp_init_parallel ) )
2681 __kmp_parallel_initialize();
2682
2683 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002684#if KMP_USE_DYNAMIC_LOCK
2685 if ( __kmp_env_consistency_check )
2686 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2687#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688 if ( __kmp_env_consistency_check )
2689 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002690#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002692#if OMP_40_ENABLED
2693 th = __kmp_thread_from_gtid(global_tid);
2694 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2695 team = th->th.th_team;
2696 if( team->t.t_level == th->th.th_teams_level ) {
2697 // this is reduction at teams construct
2698 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2699 // Let's swap teams temporarily for the reduction barrier
2700 teams_swapped = 1;
2701 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2702 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002703 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002704 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002705 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002706 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002707 }
2708 }
2709#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710
2711 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2712 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2713 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2714 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2715 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2716 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2717 // a thread-specific "th_local.reduction_method" variable is used currently
2718 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2719
2720 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2721 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2722
2723 if( packed_reduction_method == critical_reduce_block ) {
2724
2725 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2726 retval = 1;
2727
2728 } else if( packed_reduction_method == empty_reduce_block ) {
2729
2730 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2731 retval = 1;
2732
2733 } else if( packed_reduction_method == atomic_reduce_block ) {
2734
2735 retval = 2;
2736
2737 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2738 // (it's not quite good, because the checking block has been closed by this 'pop',
2739 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2740 if ( __kmp_env_consistency_check )
2741 __kmp_pop_sync( global_tid, ct_reduce, loc );
2742
2743 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2744
2745 //AT: performance issue: a real barrier here
2746 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2747 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2748 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2749 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2750 // and be more in line with sense of NOWAIT
2751 //AT: TO DO: do epcc test and compare times
2752
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002753 // this barrier should be invisible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002754 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002755#if USE_ITT_NOTIFY
2756 __kmp_threads[global_tid]->th.th_ident = loc;
2757#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2759 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2760
2761 // all other workers except master should do this pop here
2762 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2763 if ( __kmp_env_consistency_check ) {
2764 if( retval == 0 ) {
2765 __kmp_pop_sync( global_tid, ct_reduce, loc );
2766 }
2767 }
2768
2769 } else {
2770
2771 // should never reach this block
2772 KMP_ASSERT( 0 ); // "unexpected method"
2773
2774 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002775#if OMP_40_ENABLED
2776 if( teams_swapped ) {
2777 // Restore thread structure
2778 th->th.th_info.ds.ds_tid = 0;
2779 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002780 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002781 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002782 th->th.th_task_state = task_state;
2783 }
2784#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002785 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2786
2787 return retval;
2788}
2789
2790/*!
2791@ingroup SYNCHRONIZATION
2792@param loc source location information
2793@param global_tid global thread id.
2794@param lck pointer to the unique lock data structure
2795
2796Finish the execution of a reduce nowait.
2797*/
2798void
2799__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2800
2801 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2802
2803 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2804
2805 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2806
2807 if( packed_reduction_method == critical_reduce_block ) {
2808
2809 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2810
2811 } else if( packed_reduction_method == empty_reduce_block ) {
2812
2813 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2814
2815 } else if( packed_reduction_method == atomic_reduce_block ) {
2816
2817 // neither master nor other workers should get here
2818 // (code gen does not generate this call in case 2: atomic reduce block)
2819 // actually it's better to remove this elseif at all;
2820 // after removal this value will checked by the 'else' and will assert
2821
2822 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2823
2824 // only master gets here
2825
2826 } else {
2827
2828 // should never reach this block
2829 KMP_ASSERT( 0 ); // "unexpected method"
2830
2831 }
2832
2833 if ( __kmp_env_consistency_check )
2834 __kmp_pop_sync( global_tid, ct_reduce, loc );
2835
2836 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2837
2838 return;
2839}
2840
2841/* 2.a.ii. Reduce Block with a terminating barrier */
2842
2843/*!
2844@ingroup SYNCHRONIZATION
2845@param loc source location information
2846@param global_tid global thread number
2847@param num_vars number of items (variables) to be reduced
2848@param reduce_size size of data in bytes to be reduced
2849@param reduce_data pointer to data to be reduced
2850@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2851@param lck pointer to the unique lock data structure
2852@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2853
2854A blocking reduce that includes an implicit barrier.
2855*/
2856kmp_int32
2857__kmpc_reduce(
2858 ident_t *loc, kmp_int32 global_tid,
2859 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2860 void (*reduce_func)(void *lhs_data, void *rhs_data),
2861 kmp_critical_name *lck )
2862{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002863 KMP_COUNT_BLOCK(REDUCE_wait);
Jonathan Peyton5de1d472015-06-03 19:31:39 +00002864 int retval = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2866
2867 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2868
2869 // why do we need this initialization here at all?
2870 // Reduction clause can not be a stand-alone directive.
2871
2872 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2873 // possible detection of false-positive race by the threadchecker ???
2874 if( ! TCR_4( __kmp_init_parallel ) )
2875 __kmp_parallel_initialize();
2876
2877 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002878#if KMP_USE_DYNAMIC_LOCK
2879 if ( __kmp_env_consistency_check )
2880 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2881#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882 if ( __kmp_env_consistency_check )
2883 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002884#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885
Jim Cownie5e8470a2013-09-27 10:38:44 +00002886 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2887 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2888
2889 if( packed_reduction_method == critical_reduce_block ) {
2890
2891 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2892 retval = 1;
2893
2894 } else if( packed_reduction_method == empty_reduce_block ) {
2895
2896 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2897 retval = 1;
2898
2899 } else if( packed_reduction_method == atomic_reduce_block ) {
2900
2901 retval = 2;
2902
2903 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2904
2905 //case tree_reduce_block:
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002906 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002907 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002908#if USE_ITT_NOTIFY
2909 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2910#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2912 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2913
2914 // all other workers except master should do this pop here
2915 // ( none of other workers except master will enter __kmpc_end_reduce() )
2916 if ( __kmp_env_consistency_check ) {
2917 if( retval == 0 ) { // 0: all other workers; 1: master
2918 __kmp_pop_sync( global_tid, ct_reduce, loc );
2919 }
2920 }
2921
2922 } else {
2923
2924 // should never reach this block
2925 KMP_ASSERT( 0 ); // "unexpected method"
2926
2927 }
2928
2929 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2930
2931 return retval;
2932}
2933
2934/*!
2935@ingroup SYNCHRONIZATION
2936@param loc source location information
2937@param global_tid global thread id.
2938@param lck pointer to the unique lock data structure
2939
2940Finish the execution of a blocking reduce.
2941The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2942*/
2943void
2944__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2945
2946 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2947
2948 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2949
2950 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2951
Andrey Churbanov9f5a9b02015-08-05 12:00:07 +00002952 // this barrier should be visible to a customer and to the threading profile tool
Jim Cownie5e8470a2013-09-27 10:38:44 +00002953 // (it's a terminating barrier on constructs if NOWAIT not specified)
2954
2955 if( packed_reduction_method == critical_reduce_block ) {
2956
2957 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2958
2959 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002960#if USE_ITT_NOTIFY
2961 __kmp_threads[global_tid]->th.th_ident = loc;
2962#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2964
2965 } else if( packed_reduction_method == empty_reduce_block ) {
2966
2967 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2968
2969 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002970#if USE_ITT_NOTIFY
2971 __kmp_threads[global_tid]->th.th_ident = loc;
2972#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2974
2975 } else if( packed_reduction_method == atomic_reduce_block ) {
2976
2977 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002978#if USE_ITT_NOTIFY
2979 __kmp_threads[global_tid]->th.th_ident = loc;
2980#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002981 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2982
2983 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2984
2985 // only master executes here (master releases all other workers)
2986 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2987
2988 } else {
2989
2990 // should never reach this block
2991 KMP_ASSERT( 0 ); // "unexpected method"
2992
2993 }
2994
2995 if ( __kmp_env_consistency_check )
2996 __kmp_pop_sync( global_tid, ct_reduce, loc );
2997
2998 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2999
3000 return;
3001}
3002
3003#undef __KMP_GET_REDUCTION_METHOD
3004#undef __KMP_SET_REDUCTION_METHOD
3005
3006/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
3007
3008kmp_uint64
3009__kmpc_get_taskid() {
3010
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003011 kmp_int32 gtid;
3012 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003013
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003014 gtid = __kmp_get_gtid();
3015 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003017 }; // if
3018 thread = __kmp_thread_from_gtid( gtid );
3019 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003020
3021} // __kmpc_get_taskid
3022
3023
3024kmp_uint64
3025__kmpc_get_parent_taskid() {
3026
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003027 kmp_int32 gtid;
3028 kmp_info_t * thread;
3029 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003031 gtid = __kmp_get_gtid();
3032 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003033 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003034 }; // if
3035 thread = __kmp_thread_from_gtid( gtid );
3036 parent_task = thread->th.th_current_task->td_parent;
3037 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038
3039} // __kmpc_get_parent_taskid
3040
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003041void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003043 if ( ! __kmp_init_serial ) {
3044 __kmp_serial_initialize();
3045 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003046 __kmp_place_num_sockets = nS;
3047 __kmp_place_socket_offset = sO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048 __kmp_place_num_cores = nC;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003049 __kmp_place_core_offset = cO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050 __kmp_place_num_threads_per_core = nT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003051}
3052
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003053#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003054/*!
3055@ingroup WORK_SHARING
3056@param loc source location information.
3057@param gtid global thread number.
3058@param num_dims number of associated doacross loops.
3059@param dims info on loops bounds.
3060
3061Initialize doacross loop information.
3062Expect compiler send us inclusive bounds,
3063e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3064*/
3065void
3066__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims)
3067{
3068 int j, idx;
3069 kmp_int64 last, trace_count;
3070 kmp_info_t *th = __kmp_threads[gtid];
3071 kmp_team_t *team = th->th.th_team;
3072 kmp_uint32 *flags;
3073 kmp_disp_t *pr_buf = th->th.th_dispatch;
3074 dispatch_shared_info_t *sh_buf;
3075
3076 KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3077 gtid, num_dims, !team->t.t_serialized));
3078 KMP_DEBUG_ASSERT(dims != NULL);
3079 KMP_DEBUG_ASSERT(num_dims > 0);
3080
3081 if( team->t.t_serialized ) {
3082 KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n"));
3083 return; // no dependencies if team is serialized
3084 }
3085 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3086 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop
Jonathan Peyton067325f2016-05-31 19:01:15 +00003087 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003088
3089 // Save bounds info into allocated private buffer
3090 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3091 pr_buf->th_doacross_info =
3092 (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1));
3093 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3094 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions
3095 // Save also address of num_done in order to access it later without knowing the buffer index
3096 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3097 pr_buf->th_doacross_info[2] = dims[0].lo;
3098 pr_buf->th_doacross_info[3] = dims[0].up;
3099 pr_buf->th_doacross_info[4] = dims[0].st;
3100 last = 5;
3101 for( j = 1; j < num_dims; ++j ) {
3102 kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0]
3103 if( dims[j].st == 1 ) { // most common case
3104 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3105 range_length = dims[j].up - dims[j].lo + 1;
3106 } else {
3107 if( dims[j].st > 0 ) {
3108 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3109 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3110 } else { // negative increment
3111 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3112 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3113 }
3114 }
3115 pr_buf->th_doacross_info[last++] = range_length;
3116 pr_buf->th_doacross_info[last++] = dims[j].lo;
3117 pr_buf->th_doacross_info[last++] = dims[j].up;
3118 pr_buf->th_doacross_info[last++] = dims[j].st;
3119 }
3120
3121 // Compute total trip count.
3122 // Start with range of dims[0] which we don't need to keep in the buffer.
3123 if( dims[0].st == 1 ) { // most common case
3124 trace_count = dims[0].up - dims[0].lo + 1;
3125 } else if( dims[0].st > 0 ) {
3126 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3127 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3128 } else { // negative increment
3129 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3130 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3131 }
3132 for( j = 1; j < num_dims; ++j ) {
3133 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3134 }
3135 KMP_DEBUG_ASSERT(trace_count > 0);
3136
Jonathan Peyton067325f2016-05-31 19:01:15 +00003137 // Check if shared buffer is not occupied by other loop (idx - __kmp_dispatch_num_buffers)
Jonathan Peyton71909c52016-03-02 22:42:06 +00003138 if( idx != sh_buf->doacross_buf_idx ) {
3139 // Shared buffer is occupied, wait for it to be free
3140 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3141 }
3142 // Check if we are the first thread. After the CAS the first thread gets 0,
3143 // others get 1 if initialization is in progress, allocated pointer otherwise.
3144 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3145 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3146 if( flags == NULL ) {
3147 // we are the first thread, allocate the array of flags
3148 kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3149 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3150 } else if( (kmp_int64)flags == 1 ) {
3151 // initialization is still in progress, need to wait
3152 while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3153 KMP_YIELD(TRUE);
3154 }
3155 }
3156 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer
3157 pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not
3158 // touch shared buffer on each iteration
3159 KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid));
3160}
3161
3162void
3163__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec)
3164{
3165 kmp_int32 shft, num_dims, i;
3166 kmp_uint32 flag;
3167 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3168 kmp_info_t *th = __kmp_threads[gtid];
3169 kmp_team_t *team = th->th.th_team;
3170 kmp_disp_t *pr_buf;
3171 kmp_int64 lo, up, st;
3172
3173 KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3174 if( team->t.t_serialized ) {
3175 KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n"));
3176 return; // no dependencies if team is serialized
3177 }
3178
3179 // calculate sequential iteration number and check out-of-bounds condition
3180 pr_buf = th->th.th_dispatch;
3181 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3182 num_dims = pr_buf->th_doacross_info[0];
3183 lo = pr_buf->th_doacross_info[2];
3184 up = pr_buf->th_doacross_info[3];
3185 st = pr_buf->th_doacross_info[4];
3186 if( st == 1 ) { // most common case
3187 if( vec[0] < lo || vec[0] > up ) {
3188 KA_TRACE(20,(
3189 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3190 gtid, vec[0], lo, up));
3191 return;
3192 }
3193 iter_number = vec[0] - lo;
3194 } else if( st > 0 ) {
3195 if( vec[0] < lo || vec[0] > up ) {
3196 KA_TRACE(20,(
3197 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3198 gtid, vec[0], lo, up));
3199 return;
3200 }
3201 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3202 } else { // negative increment
3203 if( vec[0] > lo || vec[0] < up ) {
3204 KA_TRACE(20,(
3205 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3206 gtid, vec[0], lo, up));
3207 return;
3208 }
3209 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3210 }
3211 for( i = 1; i < num_dims; ++i ) {
3212 kmp_int64 iter, ln;
3213 kmp_int32 j = i * 4;
3214 ln = pr_buf->th_doacross_info[j + 1];
3215 lo = pr_buf->th_doacross_info[j + 2];
3216 up = pr_buf->th_doacross_info[j + 3];
3217 st = pr_buf->th_doacross_info[j + 4];
3218 if( st == 1 ) {
3219 if( vec[i] < lo || vec[i] > up ) {
3220 KA_TRACE(20,(
3221 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3222 gtid, vec[i], lo, up));
3223 return;
3224 }
3225 iter = vec[i] - lo;
3226 } else if( st > 0 ) {
3227 if( vec[i] < lo || vec[i] > up ) {
3228 KA_TRACE(20,(
3229 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3230 gtid, vec[i], lo, up));
3231 return;
3232 }
3233 iter = (kmp_uint64)(vec[i] - lo) / st;
3234 } else { // st < 0
3235 if( vec[i] > lo || vec[i] < up ) {
3236 KA_TRACE(20,(
3237 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3238 gtid, vec[i], lo, up));
3239 return;
3240 }
3241 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3242 }
3243 iter_number = iter + ln * iter_number;
3244 }
3245 shft = iter_number % 32; // use 32-bit granularity
3246 iter_number >>= 5; // divided by 32
3247 flag = 1 << shft;
3248 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3249 KMP_YIELD(TRUE);
3250 }
3251 KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3252 gtid, (iter_number<<5)+shft));
3253}
3254
3255void
3256__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec)
3257{
3258 kmp_int32 shft, num_dims, i;
3259 kmp_uint32 flag;
3260 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3261 kmp_info_t *th = __kmp_threads[gtid];
3262 kmp_team_t *team = th->th.th_team;
3263 kmp_disp_t *pr_buf;
3264 kmp_int64 lo, st;
3265
3266 KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3267 if( team->t.t_serialized ) {
3268 KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n"));
3269 return; // no dependencies if team is serialized
3270 }
3271
3272 // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks)
3273 pr_buf = th->th.th_dispatch;
3274 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3275 num_dims = pr_buf->th_doacross_info[0];
3276 lo = pr_buf->th_doacross_info[2];
3277 st = pr_buf->th_doacross_info[4];
3278 if( st == 1 ) { // most common case
3279 iter_number = vec[0] - lo;
3280 } else if( st > 0 ) {
3281 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3282 } else { // negative increment
3283 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3284 }
3285 for( i = 1; i < num_dims; ++i ) {
3286 kmp_int64 iter, ln;
3287 kmp_int32 j = i * 4;
3288 ln = pr_buf->th_doacross_info[j + 1];
3289 lo = pr_buf->th_doacross_info[j + 2];
3290 st = pr_buf->th_doacross_info[j + 4];
3291 if( st == 1 ) {
3292 iter = vec[i] - lo;
3293 } else if( st > 0 ) {
3294 iter = (kmp_uint64)(vec[i] - lo) / st;
3295 } else { // st < 0
3296 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3297 }
3298 iter_number = iter + ln * iter_number;
3299 }
3300 shft = iter_number % 32; // use 32-bit granularity
3301 iter_number >>= 5; // divided by 32
3302 flag = 1 << shft;
3303 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3304 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3305 KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3306 gtid, (iter_number<<5)+shft));
3307}
3308
3309void
3310__kmpc_doacross_fini(ident_t *loc, int gtid)
3311{
3312 kmp_int64 num_done;
3313 kmp_info_t *th = __kmp_threads[gtid];
3314 kmp_team_t *team = th->th.th_team;
3315 kmp_disp_t *pr_buf = th->th.th_dispatch;
3316
3317 KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3318 if( team->t.t_serialized ) {
3319 KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team));
3320 return; // nothing to do
3321 }
3322 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3323 if( num_done == th->th.th_team_nproc ) {
3324 // we are the last thread, need to free shared resources
3325 int idx = pr_buf->th_doacross_buf_idx - 1;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003326 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003327 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3328 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3329 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3330 __kmp_thread_free(th, (void*)sh_buf->doacross_flags);
3331 sh_buf->doacross_flags = NULL;
3332 sh_buf->doacross_num_done = 0;
Jonathan Peyton067325f2016-05-31 19:01:15 +00003333 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers; // free buffer for future re-use
Jonathan Peyton71909c52016-03-02 22:42:06 +00003334 }
3335 // free private resources (need to keep buffer index forever)
3336 __kmp_thread_free(th, (void*)pr_buf->th_doacross_info);
3337 pr_buf->th_doacross_info = NULL;
3338 KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid));
3339}
3340#endif
3341
Jim Cownie5e8470a2013-09-27 10:38:44 +00003342// end of file //
3343