blob: 7ab5a82671a2b3f195353e4844f130d81c53e0ea [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000073}
74
75/*!
76@ingroup THREAD_STATES
77@param loc Source location information.
78@return The global thread index of the active thread.
79
80This function can be called in any context.
81
82If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000083single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
84that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000085active parallel construct. (Or zero if there is no active parallel
86construct, since the master thread is necessarily thread zero).
87
88If multiple non-OpenMP threads all enter an OpenMP construct then this
89will be a unique thread identifier among all the threads created by
90the OpenMP runtime (but the value cannote be defined in terms of
91OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000092*/
Jonathan Peyton30419822017-05-12 18:01:32 +000093kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
94 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +000095
Jonathan Peyton30419822017-05-12 18:01:32 +000096 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000097
Jonathan Peyton30419822017-05-12 18:01:32 +000098 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +000099}
100
101/*!
102@ingroup THREAD_STATES
103@param loc Source location information.
104@return The number of threads under control of the OpenMP<sup>*</sup> runtime
105
106This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000107It returns the total number of threads under the control of the OpenMP runtime.
108That is not a number that can be determined by any OpenMP standard calls, since
109the library may be called from more than one non-OpenMP thread, and this
110reflects the total over all such calls. Similarly the runtime maintains
111underlying threads even when they are not active (since the cost of creating
112and destroying OS threads is high), this call counts all such threads even if
113they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000115kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
116 KC_TRACE(10,
117 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118
Jonathan Peyton30419822017-05-12 18:01:32 +0000119 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120}
121
122/*!
123@ingroup THREAD_STATES
124@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000125@return The thread number of the calling thread in the innermost active parallel
126construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000128kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
129 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
130 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131}
132
133/*!
134@ingroup THREAD_STATES
135@param loc Source location information.
136@return The number of threads in the innermost active parallel construct.
137*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000138kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
139 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140
Jonathan Peyton30419822017-05-12 18:01:32 +0000141 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142}
143
144/*!
145 * @ingroup DEPRECATED
146 * @param loc location description
147 *
148 * This function need not be called. It always returns TRUE.
149 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000150kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151#ifndef KMP_DEBUG
152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154
155#else
156
Jonathan Peyton30419822017-05-12 18:01:32 +0000157 const char *semi2;
158 const char *semi3;
159 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000163 }
164 semi2 = loc->psource;
165 if (semi2 == NULL) {
166 return TRUE;
167 }
168 semi2 = strchr(semi2, ';');
169 if (semi2 == NULL) {
170 return TRUE;
171 }
172 semi2 = strchr(semi2 + 1, ';');
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 if (__kmp_par_range_filename[0]) {
177 const char *name = semi2 - 1;
178 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
179 name--;
180 }
181 if ((*name == '/') || (*name == ';')) {
182 name++;
183 }
184 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
185 return __kmp_par_range < 0;
186 }
187 }
188 semi3 = strchr(semi2 + 1, ';');
189 if (__kmp_par_range_routine[0]) {
190 if ((semi3 != NULL) && (semi3 > semi2) &&
191 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
192 return __kmp_par_range < 0;
193 }
194 }
195 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
196 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
197 return __kmp_par_range > 0;
198 }
199 return __kmp_par_range < 0;
200 }
201 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202
203#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204}
205
206/*!
207@ingroup THREAD_STATES
208@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000209@return 1 if this thread is executing inside an active parallel region, zero if
210not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000212kmp_int32 __kmpc_in_parallel(ident_t *loc) {
213 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214}
215
216/*!
217@ingroup PARALLEL
218@param loc source location information
219@param global_tid global thread number
220@param num_threads number of threads requested for this parallel construct
221
222Set the number of threads to be used by the next fork spawned by this thread.
223This call is only required if the parallel construct has a `num_threads` clause.
224*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000225void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
226 kmp_int32 num_threads) {
227 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
228 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231}
232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
234 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235
Jonathan Peyton30419822017-05-12 18:01:32 +0000236 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237}
238
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239#if OMP_40_ENABLED
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
242 kmp_int32 proc_bind) {
243 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
244 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245
Jonathan Peyton30419822017-05-12 18:01:32 +0000246 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247}
248
249#endif /* OMP_40_ENABLED */
250
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251/*!
252@ingroup PARALLEL
253@param loc source location information
254@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000255@param microtask pointer to callback routine consisting of outlined parallel
256construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257@param ... pointers to shared variables that aren't global
258
259Do the actual fork and call the microtask in the relevant number of threads.
260*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000261void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000263
Jonathan Peyton61118492016-05-20 19:03:38 +0000264#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000265 // If we were in a serial region, then stop the serial timer, record
266 // the event, and start parallel region timer
267 stats_state_e previous_state = KMP_GET_THREAD_STATE();
268 if (previous_state == stats_state_e::SERIAL_REGION) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270 } else {
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000273 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 if (inParallel) {
275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276 } else {
277 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000278 }
279#endif
280
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 // maybe to save thr_state is enough here
282 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 va_list ap;
284 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000286#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000287 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000288 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000289 kmp_info_t *master_th = __kmp_threads[gtid];
290 kmp_team_t *parent_team = master_th->th.th_team;
291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292 if (lwt)
293 ompt_frame = &(lwt->ompt_task_info.frame);
294 else {
295 int tid = __kmp_tid_from_gtid(gtid);
296 ompt_frame = &(
297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298 }
Joachim Protzec255ca72017-11-05 14:11:10 +0000299 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000300 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000301 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000302#endif
303
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000304#if INCLUDE_SSC_MARKS
305 SSC_MARK_FORKING();
306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000311#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000317#if INCLUDE_SSC_MARKS
318 SSC_MARK_JOINING();
319#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000321#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ,
323 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000324#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326
Jonathan Peyton30419822017-05-12 18:01:32 +0000327 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000329
330#if KMP_STATS_ENABLED
331 if (previous_state == stats_state_e::SERIAL_REGION) {
332 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
333 } else {
334 KMP_POP_PARTITIONED_TIMER();
335 }
336#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337}
338
339#if OMP_40_ENABLED
340/*!
341@ingroup PARALLEL
342@param loc source location information
343@param global_tid global thread number
344@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000345@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346
347Set the number of teams to be used by the teams construct.
348This call is only required if the teams construct has a `num_teams` clause
349or a `thread_limit` clause (or both).
350*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000351void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
352 kmp_int32 num_teams, kmp_int32 num_threads) {
353 KA_TRACE(20,
354 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356
Jonathan Peyton30419822017-05-12 18:01:32 +0000357 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358}
359
360/*!
361@ingroup PARALLEL
362@param loc source location information
363@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000364@param microtask pointer to callback routine consisting of outlined teams
365construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366@param ... pointers to shared variables that aren't global
367
368Do the actual fork and call the microtask in the relevant number of threads.
369*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000370void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
371 ...) {
372 int gtid = __kmp_entry_gtid();
373 kmp_info_t *this_thr = __kmp_threads[gtid];
374 va_list ap;
375 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000376
Jonathan Peyton30419822017-05-12 18:01:32 +0000377 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000378
Jonathan Peyton30419822017-05-12 18:01:32 +0000379 // remember teams entry point and nesting level
380 this_thr->th.th_teams_microtask = microtask;
381 this_thr->th.th_teams_level =
382 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000383
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000384#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 kmp_team_t *parent_team = this_thr->th.th_team;
386 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000387 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protzec255ca72017-11-05 14:11:10 +0000389 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Jonathan Peyton30419822017-05-12 18:01:32 +0000390 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000391 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000392#endif
393
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 // check if __kmpc_push_num_teams called, set default number of teams
395 // otherwise
396 if (this_thr->th.th_teams_size.nteams == 0) {
397 __kmp_push_num_teams(loc, gtid, 0, 0);
398 }
399 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
400 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
401 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 VOLATILE_CAST(microtask_t)
405 __kmp_teams_master, // "wrapped" task
406 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000407#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000408 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000409#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000410 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 );
413 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000414#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000415 ,
416 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000417#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000419
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 this_thr->th.th_teams_microtask = NULL;
421 this_thr->th.th_teams_level = 0;
422 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
423 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424}
425#endif /* OMP_40_ENABLED */
426
Jim Cownie5e8470a2013-09-27 10:38:44 +0000427// I don't think this function should ever have been exported.
428// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
429// openmp code ever called it, but it's been exported from the RTL for so
430// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000431int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000432
433/*!
434@ingroup PARALLEL
435@param loc source location information
436@param global_tid global thread number
437
438Enter a serialized parallel construct. This interface is used to handle a
439conditional parallel region, like this,
440@code
441#pragma omp parallel if (condition)
442@endcode
443when the condition is false.
444*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000445void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000446// The implementation is now in kmp_runtime.cpp so that it can share static
447// functions with kmp_fork_call since the tasks to be done are similar in
448// each case.
449#if OMPT_SUPPORT
450 OMPT_STORE_RETURN_ADDRESS(global_tid);
451#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453}
454
455/*!
456@ingroup PARALLEL
457@param loc source location information
458@param global_tid global thread number
459
460Leave a serialized parallel construct.
461*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000462void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
463 kmp_internal_control_t *top;
464 kmp_info_t *this_thr;
465 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 KC_TRACE(10,
468 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000469
Jonathan Peyton30419822017-05-12 18:01:32 +0000470 /* skip all this code for autopar serialized loops since it results in
471 unacceptable overhead */
472 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
473 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Jonathan Peyton30419822017-05-12 18:01:32 +0000475 // Not autopar code
476 if (!TCR_4(__kmp_init_parallel))
477 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 this_thr = __kmp_threads[global_tid];
480 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000481
Jonathan Peyton30419822017-05-12 18:01:32 +0000482#if OMP_45_ENABLED
483 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000484
Jonathan Peyton30419822017-05-12 18:01:32 +0000485 // we need to wait for the proxy tasks before finishing the thread
486 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
487 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
488#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000489
Jonathan Peyton30419822017-05-12 18:01:32 +0000490 KMP_MB();
491 KMP_DEBUG_ASSERT(serial_team);
492 KMP_ASSERT(serial_team->t.t_serialized);
493 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
494 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
495 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
496 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000497
Joachim Protze82e94a52017-11-01 10:08:30 +0000498#if OMPT_SUPPORT
499 if (ompt_enabled.enabled &&
500 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000501 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +0000502 if (ompt_enabled.ompt_callback_implicit_task) {
503 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
504 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +0000505 OMPT_CUR_TASK_INFO(this_thr)->thread_num);
Joachim Protze82e94a52017-11-01 10:08:30 +0000506 }
507
508 // reset clear the task id only after unlinking the task
509 ompt_data_t *parent_task_data;
510 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
511
512 if (ompt_enabled.ompt_callback_parallel_end) {
513 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
514 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
515 ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
516 }
517 __ompt_lw_taskteam_unlink(this_thr);
518 this_thr->th.ompt_thread_info.state = omp_state_overhead;
519 }
520#endif
521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 /* If necessary, pop the internal control stack values and replace the team
523 * values */
524 top = serial_team->t.t_control_stack_top;
525 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
526 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
527 serial_team->t.t_control_stack_top = top->next;
528 __kmp_free(top);
529 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 // if( serial_team -> t.t_serialized > 1 )
532 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 /* pop dispatch buffers stack */
535 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
536 {
537 dispatch_private_info_t *disp_buffer =
538 serial_team->t.t_dispatch->th_disp_buffer;
539 serial_team->t.t_dispatch->th_disp_buffer =
540 serial_team->t.t_dispatch->th_disp_buffer->next;
541 __kmp_free(disp_buffer);
542 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 --serial_team->t.t_serialized;
545 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000546
Jonathan Peyton30419822017-05-12 18:01:32 +0000547/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000548
549#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000550 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
551 __kmp_clear_x87_fpu_status_word();
552 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
553 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
556
Jonathan Peyton30419822017-05-12 18:01:32 +0000557 this_thr->th.th_team = serial_team->t.t_parent;
558 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559
Jonathan Peyton30419822017-05-12 18:01:32 +0000560 /* restore values cached in the thread */
561 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
562 this_thr->th.th_team_master =
563 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
564 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 /* TODO the below shouldn't need to be adjusted for serialized teams */
567 this_thr->th.th_dispatch =
568 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569
Jonathan Peyton30419822017-05-12 18:01:32 +0000570 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
573 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575 if (__kmp_tasking_mode != tskm_immediate_exec) {
576 // Copy the task team from the new child / old parent team to the thread.
577 this_thr->th.th_task_team =
578 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
579 KA_TRACE(20,
580 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
581 "team %p\n",
582 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 } else {
585 if (__kmp_tasking_mode != tskm_immediate_exec) {
586 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
587 "depth of serial team %p to %d\n",
588 global_tid, serial_team, serial_team->t.t_serialized));
589 }
590 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591
Jonathan Peyton30419822017-05-12 18:01:32 +0000592 if (__kmp_env_consistency_check)
593 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000594#if OMPT_SUPPORT
595 if (ompt_enabled.enabled)
596 this_thr->th.ompt_thread_info.state =
597 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
598 : omp_state_work_parallel);
599#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600}
601
602/*!
603@ingroup SYNCHRONIZATION
604@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000606Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607depending on the memory ordering convention obeyed by the compiler
608even that may not be necessary).
609*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000610void __kmpc_flush(ident_t *loc) {
611 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 /* need explicit __mf() here since use volatile instead in library */
614 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615
Jonathan Peyton30419822017-05-12 18:01:32 +0000616#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
617#if KMP_MIC
618// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
619// We shouldn't need it, though, since the ABI rules require that
620// * If the compiler generates NGO stores it also generates the fence
621// * If users hand-code NGO stores they should insert the fence
622// therefore no incomplete unordered stores should be visible.
623#else
624 // C74404
625 // This is to address non-temporal store instructions (sfence needed).
626 // The clflush instruction is addressed either (mfence needed).
627 // Probably the non-temporal load monvtdqa instruction should also be
628 // addressed.
629 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
630 if (!__kmp_cpuinfo.initialized) {
631 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000632 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000633 if (!__kmp_cpuinfo.sse2) {
634 // CPU cannot execute SSE2 instructions.
635 } else {
636#if KMP_COMPILER_ICC
637 _mm_mfence();
638#elif KMP_COMPILER_MSVC
639 MemoryBarrier();
640#else
641 __sync_synchronize();
642#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000643 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000644#endif // KMP_MIC
645#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
646// Nothing to see here move along
647#elif KMP_ARCH_PPC64
648// Nothing needed here (we have a real MB above).
649#if KMP_OS_CNK
650 // The flushing thread needs to yield here; this prevents a
651 // busy-waiting thread from saturating the pipeline. flush is
652 // often used in loops like this:
653 // while (!flag) {
654 // #pragma omp flush(flag)
655 // }
656 // and adding the yield here is good for at least a 10x speedup
657 // when running >2 threads per core (on the NAS LU benchmark).
658 __kmp_yield(TRUE);
659#endif
660#else
661#error Unknown or unsupported architecture
662#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000663
664#if OMPT_SUPPORT && OMPT_OPTIONAL
665 if (ompt_enabled.ompt_callback_flush) {
666 ompt_callbacks.ompt_callback(ompt_callback_flush)(
667 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
668 }
669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670}
671
672/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673/*!
674@ingroup SYNCHRONIZATION
675@param loc source location information
676@param global_tid thread id.
677
678Execute a barrier.
679*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000680void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
681 KMP_COUNT_BLOCK(OMP_BARRIER);
682 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683
Jonathan Peyton30419822017-05-12 18:01:32 +0000684 if (!TCR_4(__kmp_init_parallel))
685 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686
Jonathan Peyton30419822017-05-12 18:01:32 +0000687 if (__kmp_env_consistency_check) {
688 if (loc == 0) {
689 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_check_barrier(global_tid, ct_barrier, loc);
693 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000694
Joachim Protze82e94a52017-11-01 10:08:30 +0000695#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000696 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000697 if (ompt_enabled.enabled) {
698 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +0000699 if (ompt_frame->enter_frame == NULL)
700 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000701 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000702 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000703#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000704 __kmp_threads[global_tid]->th.th_ident = loc;
705 // TODO: explicit barrier_wait_id:
706 // this function is called when 'barrier' directive is present or
707 // implicit barrier at the end of a worksharing construct.
708 // 1) better to add a per-thread barrier counter to a thread data structure
709 // 2) set to 0 when a new team is created
710 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000713#if OMPT_SUPPORT && OMPT_OPTIONAL
714 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000715 ompt_frame->enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000718}
719
720/* The BARRIER for a MASTER section is always explicit */
721/*!
722@ingroup WORK_SHARING
723@param loc source location information.
724@param global_tid global thread number .
725@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
726*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000727kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
728 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729
Jonathan Peyton30419822017-05-12 18:01:32 +0000730 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731
Jonathan Peyton30419822017-05-12 18:01:32 +0000732 if (!TCR_4(__kmp_init_parallel))
733 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 if (KMP_MASTER_GTID(global_tid)) {
736 KMP_COUNT_BLOCK(OMP_MASTER);
737 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
738 status = 1;
739 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740
Joachim Protze82e94a52017-11-01 10:08:30 +0000741#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000743 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 kmp_info_t *this_thr = __kmp_threads[global_tid];
745 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000748 ompt_callbacks.ompt_callback(ompt_callback_master)(
749 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
750 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
751 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000752 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000754#endif
755
Jonathan Peyton30419822017-05-12 18:01:32 +0000756 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000757#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 if (status)
759 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
760 else
761 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000762#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 if (status)
764 __kmp_push_sync(global_tid, ct_master, loc, NULL);
765 else
766 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769
Jonathan Peyton30419822017-05-12 18:01:32 +0000770 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771}
772
773/*!
774@ingroup WORK_SHARING
775@param loc source location information.
776@param global_tid global thread number .
777
Jonathan Peyton30419822017-05-12 18:01:32 +0000778Mark the end of a <tt>master</tt> region. This should only be called by the
779thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000781void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
782 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783
Jonathan Peyton30419822017-05-12 18:01:32 +0000784 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
785 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786
Joachim Protze82e94a52017-11-01 10:08:30 +0000787#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000788 kmp_info_t *this_thr = __kmp_threads[global_tid];
789 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000790 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000792 ompt_callbacks.ompt_callback(ompt_callback_master)(
793 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
794 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
795 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000797#endif
798
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 if (__kmp_env_consistency_check) {
800 if (global_tid < 0)
801 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 if (KMP_MASTER_GTID(global_tid))
804 __kmp_pop_sync(global_tid, ct_master, loc);
805 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806}
807
808/*!
809@ingroup WORK_SHARING
810@param loc source location information.
811@param gtid global thread number.
812
813Start execution of an <tt>ordered</tt> construct.
814*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000815void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
816 int cid = 0;
817 kmp_info_t *th;
818 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821
Jonathan Peyton30419822017-05-12 18:01:32 +0000822 if (!TCR_4(__kmp_init_parallel))
823 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
825#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 __kmp_itt_ordered_prep(gtid);
827// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828#endif /* USE_ITT_BUILD */
829
Jonathan Peyton30419822017-05-12 18:01:32 +0000830 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
Joachim Protze82e94a52017-11-01 10:08:30 +0000832#if OMPT_SUPPORT && OMPT_OPTIONAL
833 kmp_team_t *team;
Joachim Protze40636132018-05-28 08:16:08 +0000834 omp_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000835 void *codeptr_ra;
836 if (ompt_enabled.enabled) {
837 OMPT_STORE_RETURN_ADDRESS(gtid);
838 team = __kmp_team_from_gtid(gtid);
Joachim Protze40636132018-05-28 08:16:08 +0000839 lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000840 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000841 th->th.ompt_thread_info.wait_id = lck;
842 th->th.ompt_thread_info.state = omp_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000845 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
846 if (ompt_enabled.ompt_callback_mutex_acquire) {
847 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000848 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze40636132018-05-28 08:16:08 +0000849 (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000850 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000851 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000852#endif
853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 if (th->th.th_dispatch->th_deo_fcn != 0)
855 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
856 else
857 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
Joachim Protze82e94a52017-11-01 10:08:30 +0000859#if OMPT_SUPPORT && OMPT_OPTIONAL
860 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000862 th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000863 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864
Jonathan Peyton30419822017-05-12 18:01:32 +0000865 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000866 if (ompt_enabled.ompt_callback_mutex_acquired) {
867 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +0000868 ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000869 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000871#endif
872
Jim Cownie5e8470a2013-09-27 10:38:44 +0000873#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000874 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875#endif /* USE_ITT_BUILD */
876}
877
878/*!
879@ingroup WORK_SHARING
880@param loc source location information.
881@param gtid global thread number.
882
883End execution of an <tt>ordered</tt> construct.
884*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000885void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
886 int cid = 0;
887 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000888
Jonathan Peyton30419822017-05-12 18:01:32 +0000889 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890
891#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 __kmp_itt_ordered_end(gtid);
893// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000894#endif /* USE_ITT_BUILD */
895
Jonathan Peyton30419822017-05-12 18:01:32 +0000896 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000897
Jonathan Peyton30419822017-05-12 18:01:32 +0000898 if (th->th.th_dispatch->th_dxo_fcn != 0)
899 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
900 else
901 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000902
Joachim Protze82e94a52017-11-01 10:08:30 +0000903#if OMPT_SUPPORT && OMPT_OPTIONAL
904 OMPT_STORE_RETURN_ADDRESS(gtid);
905 if (ompt_enabled.ompt_callback_mutex_released) {
906 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
907 ompt_mutex_ordered,
Joachim Protze40636132018-05-28 08:16:08 +0000908 (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000909 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000911#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000912}
913
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914#if KMP_USE_DYNAMIC_LOCK
915
Jonathan Peytondae13d82015-12-11 21:57:06 +0000916static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000917__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
918 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
919 // Pointer to the allocated indirect lock is written to crit, while indexing
920 // is ignored.
921 void *idx;
922 kmp_indirect_lock_t **lck;
923 lck = (kmp_indirect_lock_t **)crit;
924 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
925 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
926 KMP_SET_I_LOCK_LOCATION(ilk, loc);
927 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
928 KA_TRACE(20,
929 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000930#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000931 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000932#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000933 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000934 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000935#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000936 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000937#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 // We don't really need to destroy the unclaimed lock here since it will be
939 // cleaned up at program exit.
940 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
941 }
942 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000943}
944
945// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000946#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
947 { \
948 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000949 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
950 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
951 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
952 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000953 kmp_uint32 spins; \
954 KMP_FSYNC_PREPARE(l); \
955 KMP_INIT_YIELD(spins); \
956 if (TCR_4(__kmp_nth) > \
957 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
958 KMP_YIELD(TRUE); \
959 } else { \
960 KMP_YIELD_SPIN(spins); \
961 } \
962 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000963 while ( \
964 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
965 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000966 __kmp_spin_backoff(&backoff); \
967 if (TCR_4(__kmp_nth) > \
968 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
969 KMP_YIELD(TRUE); \
970 } else { \
971 KMP_YIELD_SPIN(spins); \
972 } \
973 } \
974 } \
975 KMP_FSYNC_ACQUIRED(l); \
976 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000977
978// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000979#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
980 { \
981 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000982 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
983 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
984 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
985 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000986 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000987
988// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000989#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000990 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000991
Jonathan Peytondae13d82015-12-11 21:57:06 +0000992#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000993
Jonathan Peyton30419822017-05-12 18:01:32 +0000994#include <sys/syscall.h>
995#include <unistd.h>
996#ifndef FUTEX_WAIT
997#define FUTEX_WAIT 0
998#endif
999#ifndef FUTEX_WAKE
1000#define FUTEX_WAKE 1
1001#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001002
1003// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001004#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1005 { \
1006 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1007 kmp_int32 gtid_code = (gtid + 1) << 1; \
1008 KMP_MB(); \
1009 KMP_FSYNC_PREPARE(ftx); \
1010 kmp_int32 poll_val; \
1011 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1012 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1013 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1014 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1015 if (!cond) { \
1016 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1017 poll_val | \
1018 KMP_LOCK_BUSY(1, futex))) { \
1019 continue; \
1020 } \
1021 poll_val |= KMP_LOCK_BUSY(1, futex); \
1022 } \
1023 kmp_int32 rc; \
1024 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1025 NULL, NULL, 0)) != 0) { \
1026 continue; \
1027 } \
1028 gtid_code |= 1; \
1029 } \
1030 KMP_FSYNC_ACQUIRED(ftx); \
1031 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001032
1033// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001034#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1035 { \
1036 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1037 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1038 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1039 KMP_FSYNC_ACQUIRED(ftx); \
1040 rc = TRUE; \
1041 } else { \
1042 rc = FALSE; \
1043 } \
1044 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001045
1046// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001047#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1048 { \
1049 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1050 KMP_MB(); \
1051 KMP_FSYNC_RELEASING(ftx); \
1052 kmp_int32 poll_val = \
1053 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1054 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1055 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1056 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1057 } \
1058 KMP_MB(); \
1059 KMP_YIELD(TCR_4(__kmp_nth) > \
1060 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1061 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001062
Jonathan Peytondae13d82015-12-11 21:57:06 +00001063#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001064
1065#else // KMP_USE_DYNAMIC_LOCK
1066
Jonathan Peyton30419822017-05-12 18:01:32 +00001067static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1068 ident_t const *loc,
1069 kmp_int32 gtid) {
1070 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071
Jonathan Peyton30419822017-05-12 18:01:32 +00001072 // Because of the double-check, the following load doesn't need to be volatile
1073 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001074
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 if (lck == NULL) {
1076 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Jonathan Peyton30419822017-05-12 18:01:32 +00001078 // Allocate & initialize the lock.
1079 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1080 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1081 __kmp_init_user_lock_with_checks(lck);
1082 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001084 __kmp_itt_critical_creating(lck);
1085// __kmp_itt_critical_creating() should be called *before* the first usage
1086// of underlying lock. It is the only place where we can guarantee it. There
1087// are chances the lock will destroyed with no usage, but it is not a
1088// problem, because this is not real event seen by user but rather setting
1089// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090#endif /* USE_ITT_BUILD */
1091
Jonathan Peyton30419822017-05-12 18:01:32 +00001092 // Use a cmpxchg instruction to slam the start of the critical section with
1093 // the lock pointer. If another thread beat us to it, deallocate the lock,
1094 // and use the lock that the other thread allocated.
1095 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096
Jonathan Peyton30419822017-05-12 18:01:32 +00001097 if (status == 0) {
1098// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001099#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001100 __kmp_itt_critical_destroyed(lck);
1101// Let ITT know the lock is destroyed and the same memory location may be reused
1102// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001104 __kmp_destroy_user_lock_with_checks(lck);
1105 __kmp_user_lock_free(&idx, gtid, lck);
1106 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1107 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001109 }
1110 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111}
1112
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001113#endif // KMP_USE_DYNAMIC_LOCK
1114
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115/*!
1116@ingroup WORK_SHARING
1117@param loc source location information.
1118@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001119@param crit identity of the critical section. This could be a pointer to a lock
1120associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121
1122Enter code protected by a `critical` construct.
1123This function blocks until the executing thread can enter the critical section.
1124*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001125void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1126 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001127#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001128#if OMPT_SUPPORT && OMPT_OPTIONAL
1129 OMPT_STORE_RETURN_ADDRESS(global_tid);
1130#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001132#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001134#if OMPT_SUPPORT && OMPT_OPTIONAL
1135 omp_state_t prev_state = omp_state_undefined;
1136 ompt_thread_info_t ti;
1137#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141
Jonathan Peyton30419822017-05-12 18:01:32 +00001142 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001144 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146
Jonathan Peyton30419822017-05-12 18:01:32 +00001147 if ((__kmp_user_lock_kind == lk_tas) &&
1148 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1149 lck = (kmp_user_lock_p)crit;
1150 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001151#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001152 else if ((__kmp_user_lock_kind == lk_futex) &&
1153 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1154 lck = (kmp_user_lock_p)crit;
1155 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001156#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 else { // ticket, queuing or drdpa
1158 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1159 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 if (__kmp_env_consistency_check)
1162 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163
Jonathan Peyton30419822017-05-12 18:01:32 +00001164// since the critical directive binds to all threads, not just the current
1165// team we have to check this even if we are in a serialized team.
1166// also, even if we are the uber thread, we still have to conduct the lock,
1167// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168
1169#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001170 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001172#if OMPT_SUPPORT && OMPT_OPTIONAL
1173 OMPT_STORE_RETURN_ADDRESS(gtid);
1174 void *codeptr_ra = NULL;
1175 if (ompt_enabled.enabled) {
1176 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1177 /* OMPT state update */
1178 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001179 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001180 ti.state = omp_state_wait_critical;
1181
1182 /* OMPT event callback */
1183 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1184 if (ompt_enabled.ompt_callback_mutex_acquire) {
1185 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1186 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00001187 (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001188 }
1189 }
1190#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 // Value of 'crit' should be good for using as a critical_id of the critical
1192 // section directive.
1193 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001194
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001195#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001196 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001197#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001198#if OMPT_SUPPORT && OMPT_OPTIONAL
1199 if (ompt_enabled.enabled) {
1200 /* OMPT state update */
1201 ti.state = prev_state;
1202 ti.wait_id = 0;
1203
1204 /* OMPT event callback */
1205 if (ompt_enabled.ompt_callback_mutex_acquired) {
1206 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001207 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001208 }
1209 }
1210#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001211 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001212
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001213 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001214 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001215#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001216}
1217
1218#if KMP_USE_DYNAMIC_LOCK
1219
1220// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001221static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001222#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001223#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001224#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001225#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001226#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001227
1228#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001229#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001230#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001231#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001232#endif
1233
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 // Hints that do not require further logic
1235 if (hint & kmp_lock_hint_hle)
1236 return KMP_TSX_LOCK(hle);
1237 if (hint & kmp_lock_hint_rtm)
1238 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1239 if (hint & kmp_lock_hint_adaptive)
1240 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001241
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 // Rule out conflicting hints first by returning the default lock
1243 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001244 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 if ((hint & omp_lock_hint_speculative) &&
1246 (hint & omp_lock_hint_nonspeculative))
1247 return __kmp_user_lock_seq;
1248
1249 // Do not even consider speculation when it appears to be contended
1250 if (hint & omp_lock_hint_contended)
1251 return lockseq_queuing;
1252
1253 // Uncontended lock without speculation
1254 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1255 return lockseq_tas;
1256
1257 // HLE lock for speculation
1258 if (hint & omp_lock_hint_speculative)
1259 return KMP_TSX_LOCK(hle);
1260
1261 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001262}
1263
Joachim Protze82e94a52017-11-01 10:08:30 +00001264#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001265#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001266static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001267__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1268 if (user_lock) {
1269 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1270 case 0:
1271 break;
1272#if KMP_USE_FUTEX
1273 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001274 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001275#endif
1276 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001277 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001278#if KMP_USE_TSX
1279 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001280 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001281#endif
1282 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001283 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001284 }
1285 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1286 }
1287 KMP_ASSERT(ilock);
1288 switch (ilock->type) {
1289#if KMP_USE_TSX
1290 case locktag_adaptive:
1291 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001292 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001293#endif
1294 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001295 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001296#if KMP_USE_FUTEX
1297 case locktag_nested_futex:
1298#endif
1299 case locktag_ticket:
1300 case locktag_queuing:
1301 case locktag_drdpa:
1302 case locktag_nested_ticket:
1303 case locktag_nested_queuing:
1304 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001305 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001306 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001307 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001308 }
1309}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001310#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001311// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001312static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001313 switch (__kmp_user_lock_kind) {
1314 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001315 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001316#if KMP_USE_FUTEX
1317 case lk_futex:
1318#endif
1319 case lk_ticket:
1320 case lk_queuing:
1321 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001322 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001323#if KMP_USE_TSX
1324 case lk_hle:
1325 case lk_rtm:
1326 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001327 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001328#endif
1329 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001330 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001331 }
1332}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001333#endif // KMP_USE_DYNAMIC_LOCK
1334#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001335
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001336/*!
1337@ingroup WORK_SHARING
1338@param loc source location information.
1339@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001340@param crit identity of the critical section. This could be a pointer to a lock
1341associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001342@param hint the lock hint.
1343
Jonathan Peyton30419822017-05-12 18:01:32 +00001344Enter code protected by a `critical` construct with a hint. The hint value is
1345used to suggest a lock implementation. This function blocks until the executing
1346thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001347speculative execution and the hardware supports it.
1348*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001349void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1350 kmp_critical_name *crit, uintptr_t hint) {
1351 KMP_COUNT_BLOCK(OMP_CRITICAL);
1352 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001353#if OMPT_SUPPORT && OMPT_OPTIONAL
1354 omp_state_t prev_state = omp_state_undefined;
1355 ompt_thread_info_t ti;
1356 // This is the case, if called from __kmpc_critical:
1357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1358 if (!codeptr)
1359 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1360#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001361
Jonathan Peyton30419822017-05-12 18:01:32 +00001362 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001363
Jonathan Peyton30419822017-05-12 18:01:32 +00001364 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1365 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001366 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001367 if (*lk == 0) {
1368 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1369 if (KMP_IS_D_LOCK(lckseq)) {
1370 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1371 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001372 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001373 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001374 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 }
1376 // Branch for accessing the actual lock object and set operation. This
1377 // branching is inevitable since this lock initialization does not follow the
1378 // normal dispatch path (lock table is not used).
1379 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1380 lck = (kmp_user_lock_p)lk;
1381 if (__kmp_env_consistency_check) {
1382 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1383 __kmp_map_hint_to_lock(hint));
1384 }
1385#if USE_ITT_BUILD
1386 __kmp_itt_critical_acquiring(lck);
1387#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001388#if OMPT_SUPPORT && OMPT_OPTIONAL
1389 if (ompt_enabled.enabled) {
1390 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1391 /* OMPT state update */
1392 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001393 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001394 ti.state = omp_state_wait_critical;
1395
1396 /* OMPT event callback */
1397 if (ompt_enabled.ompt_callback_mutex_acquire) {
1398 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1399 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001400 __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001401 }
1402 }
1403#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001404#if KMP_USE_INLINED_TAS
1405 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1406 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1407 } else
1408#elif KMP_USE_INLINED_FUTEX
1409 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1410 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1411 } else
1412#endif
1413 {
1414 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1415 }
1416 } else {
1417 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1418 lck = ilk->lock;
1419 if (__kmp_env_consistency_check) {
1420 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1421 __kmp_map_hint_to_lock(hint));
1422 }
1423#if USE_ITT_BUILD
1424 __kmp_itt_critical_acquiring(lck);
1425#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001426#if OMPT_SUPPORT && OMPT_OPTIONAL
1427 if (ompt_enabled.enabled) {
1428 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1429 /* OMPT state update */
1430 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001431 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001432 ti.state = omp_state_wait_critical;
1433
1434 /* OMPT event callback */
1435 if (ompt_enabled.ompt_callback_mutex_acquire) {
1436 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1437 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001438 __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001439 }
1440 }
1441#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001442 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1443 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001444 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001445
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001447 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001448#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001449#if OMPT_SUPPORT && OMPT_OPTIONAL
1450 if (ompt_enabled.enabled) {
1451 /* OMPT state update */
1452 ti.state = prev_state;
1453 ti.wait_id = 0;
1454
1455 /* OMPT event callback */
1456 if (ompt_enabled.ompt_callback_mutex_acquired) {
1457 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001458 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001459 }
1460 }
1461#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001462
Jonathan Peyton30419822017-05-12 18:01:32 +00001463 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1464 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001465} // __kmpc_critical_with_hint
1466
1467#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001468
1469/*!
1470@ingroup WORK_SHARING
1471@param loc source location information.
1472@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001473@param crit identity of the critical section. This could be a pointer to a lock
1474associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475
1476Leave a critical section, releasing any lock that was held during its execution.
1477*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001478void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1479 kmp_critical_name *crit) {
1480 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001483
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001484#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001485 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1486 lck = (kmp_user_lock_p)crit;
1487 KMP_ASSERT(lck != NULL);
1488 if (__kmp_env_consistency_check) {
1489 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001490 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001491#if USE_ITT_BUILD
1492 __kmp_itt_critical_releasing(lck);
1493#endif
1494#if KMP_USE_INLINED_TAS
1495 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1496 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1497 } else
1498#elif KMP_USE_INLINED_FUTEX
1499 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1500 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1501 } else
1502#endif
1503 {
1504 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1505 }
1506 } else {
1507 kmp_indirect_lock_t *ilk =
1508 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1509 KMP_ASSERT(ilk != NULL);
1510 lck = ilk->lock;
1511 if (__kmp_env_consistency_check) {
1512 __kmp_pop_sync(global_tid, ct_critical, loc);
1513 }
1514#if USE_ITT_BUILD
1515 __kmp_itt_critical_releasing(lck);
1516#endif
1517 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1518 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001519
1520#else // KMP_USE_DYNAMIC_LOCK
1521
Jonathan Peyton30419822017-05-12 18:01:32 +00001522 if ((__kmp_user_lock_kind == lk_tas) &&
1523 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1524 lck = (kmp_user_lock_p)crit;
1525 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001526#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001527 else if ((__kmp_user_lock_kind == lk_futex) &&
1528 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1529 lck = (kmp_user_lock_p)crit;
1530 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001531#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001532 else { // ticket, queuing or drdpa
1533 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1534 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535
Jonathan Peyton30419822017-05-12 18:01:32 +00001536 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001537
Jonathan Peyton30419822017-05-12 18:01:32 +00001538 if (__kmp_env_consistency_check)
1539 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001540
1541#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001542 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001544 // Value of 'crit' should be good for using as a critical_id of the critical
1545 // section directive.
1546 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Joachim Protze82e94a52017-11-01 10:08:30 +00001548#endif // KMP_USE_DYNAMIC_LOCK
1549
1550#if OMPT_SUPPORT && OMPT_OPTIONAL
1551 /* OMPT release event triggers after lock is released; place here to trigger
1552 * for all #if branches */
1553 OMPT_STORE_RETURN_ADDRESS(global_tid);
1554 if (ompt_enabled.ompt_callback_mutex_released) {
1555 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00001556 ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001557 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001558#endif
1559
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 KMP_POP_PARTITIONED_TIMER();
1561 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562}
1563
1564/*!
1565@ingroup SYNCHRONIZATION
1566@param loc source location information
1567@param global_tid thread id.
1568@return one if the thread should execute the master block, zero otherwise
1569
Jonathan Peyton30419822017-05-12 18:01:32 +00001570Start execution of a combined barrier and master. The barrier is executed inside
1571this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001572*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001573kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1574 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001577
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 if (!TCR_4(__kmp_init_parallel))
1579 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580
Jonathan Peyton30419822017-05-12 18:01:32 +00001581 if (__kmp_env_consistency_check)
1582 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583
Joachim Protze82e94a52017-11-01 10:08:30 +00001584#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001585 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001586 if (ompt_enabled.enabled) {
1587 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001588 if (ompt_frame->enter_frame == NULL)
1589 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001590 OMPT_STORE_RETURN_ADDRESS(global_tid);
1591 }
1592#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001593#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001595#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001596 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001597#if OMPT_SUPPORT && OMPT_OPTIONAL
1598 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001599 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001600 }
1601#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
Jonathan Peyton30419822017-05-12 18:01:32 +00001603 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001604}
1605
1606/*!
1607@ingroup SYNCHRONIZATION
1608@param loc source location information
1609@param global_tid thread id.
1610
1611Complete the execution of a combined barrier and master. This function should
1612only be called at the completion of the <tt>master</tt> code. Other threads will
1613still be waiting at the barrier and this call releases them.
1614*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001615void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1616 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001617
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001619}
1620
1621/*!
1622@ingroup SYNCHRONIZATION
1623@param loc source location information
1624@param global_tid thread id.
1625@return one if the thread should execute the master block, zero otherwise
1626
1627Start execution of a combined barrier and master(nowait) construct.
1628The barrier is executed inside this function.
1629There is no equivalent "end" function, since the
1630*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001631kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1632 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633
Jonathan Peyton30419822017-05-12 18:01:32 +00001634 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635
Jonathan Peyton30419822017-05-12 18:01:32 +00001636 if (!TCR_4(__kmp_init_parallel))
1637 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638
Jonathan Peyton30419822017-05-12 18:01:32 +00001639 if (__kmp_env_consistency_check) {
1640 if (loc == 0) {
1641 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001643 __kmp_check_barrier(global_tid, ct_barrier, loc);
1644 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Joachim Protze82e94a52017-11-01 10:08:30 +00001646#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001647 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001648 if (ompt_enabled.enabled) {
1649 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001650 if (ompt_frame->enter_frame == NULL)
1651 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001652 OMPT_STORE_RETURN_ADDRESS(global_tid);
1653 }
1654#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001655#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001656 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001658 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001659#if OMPT_SUPPORT && OMPT_OPTIONAL
1660 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001661 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001662 }
1663#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 if (__kmp_env_consistency_check) {
1668 /* there's no __kmpc_end_master called; so the (stats) */
1669 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 if (global_tid < 0) {
1672 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001674 if (ret) {
1675 /* only one thread should do the pop since only */
1676 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 __kmp_pop_sync(global_tid, ct_master, loc);
1679 }
1680 }
1681
1682 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683}
1684
1685/* The BARRIER for a SINGLE process section is always explicit */
1686/*!
1687@ingroup WORK_SHARING
1688@param loc source location information
1689@param global_tid global thread number
1690@return One if this thread should execute the single construct, zero otherwise.
1691
1692Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001693There are no implicit barriers in the two "single" calls, rather the compiler
1694should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695*/
1696
Jonathan Peyton30419822017-05-12 18:01:32 +00001697kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1698 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001699
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 if (rc) {
1701 // We are going to execute the single statement, so we should count it.
1702 KMP_COUNT_BLOCK(OMP_SINGLE);
1703 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1704 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001705
Joachim Protze82e94a52017-11-01 10:08:30 +00001706#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001707 kmp_info_t *this_thr = __kmp_threads[global_tid];
1708 kmp_team_t *team = this_thr->th.th_team;
1709 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001710
Joachim Protze82e94a52017-11-01 10:08:30 +00001711 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001713 if (ompt_enabled.ompt_callback_work) {
1714 ompt_callbacks.ompt_callback(ompt_callback_work)(
1715 ompt_work_single_executor, ompt_scope_begin,
1716 &(team->t.ompt_team_info.parallel_data),
1717 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1718 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 }
1720 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001721 if (ompt_enabled.ompt_callback_work) {
1722 ompt_callbacks.ompt_callback(ompt_callback_work)(
1723 ompt_work_single_other, ompt_scope_begin,
1724 &(team->t.ompt_team_info.parallel_data),
1725 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1726 1, OMPT_GET_RETURN_ADDRESS(0));
1727 ompt_callbacks.ompt_callback(ompt_callback_work)(
1728 ompt_work_single_other, ompt_scope_end,
1729 &(team->t.ompt_team_info.parallel_data),
1730 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1731 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001732 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001733 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001734 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001735#endif
1736
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738}
1739
1740/*!
1741@ingroup WORK_SHARING
1742@param loc source location information
1743@param global_tid global thread number
1744
1745Mark the end of a <tt>single</tt> construct. This function should
1746only be called by the thread that executed the block of code protected
1747by the `single` construct.
1748*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001749void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1750 __kmp_exit_single(global_tid);
1751 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001752
Joachim Protze82e94a52017-11-01 10:08:30 +00001753#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001754 kmp_info_t *this_thr = __kmp_threads[global_tid];
1755 kmp_team_t *team = this_thr->th.th_team;
1756 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001757
Joachim Protze82e94a52017-11-01 10:08:30 +00001758 if (ompt_enabled.ompt_callback_work) {
1759 ompt_callbacks.ompt_callback(ompt_callback_work)(
1760 ompt_work_single_executor, ompt_scope_end,
1761 &(team->t.ompt_team_info.parallel_data),
1762 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1763 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001764 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001765#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766}
1767
1768/*!
1769@ingroup WORK_SHARING
1770@param loc Source location
1771@param global_tid Global thread id
1772
1773Mark the end of a statically scheduled loop.
1774*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001775void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001776 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778
Joachim Protze82e94a52017-11-01 10:08:30 +00001779#if OMPT_SUPPORT && OMPT_OPTIONAL
1780 if (ompt_enabled.ompt_callback_work) {
Joachim Protze91732472017-11-10 21:07:01 +00001781 ompt_work_type_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001783 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1784 // Determine workshare type
1785 if (loc != NULL) {
1786 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1787 ompt_work_type = ompt_work_loop;
1788 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1789 ompt_work_type = ompt_work_sections;
1790 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1791 ompt_work_type = ompt_work_distribute;
1792 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001793 // use default set above.
1794 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001795 }
1796 KMP_DEBUG_ASSERT(ompt_work_type);
1797 }
1798 ompt_callbacks.ompt_callback(ompt_callback_work)(
1799 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1800 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001802#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 if (__kmp_env_consistency_check)
1804 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805}
1806
Jonathan Peyton30419822017-05-12 18:01:32 +00001807// User routines which take C-style arguments (call by value)
1808// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809
Jonathan Peyton30419822017-05-12 18:01:32 +00001810void ompc_set_num_threads(int arg) {
1811 // !!!!! TODO: check the per-task binding
1812 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813}
1814
Jonathan Peyton30419822017-05-12 18:01:32 +00001815void ompc_set_dynamic(int flag) {
1816 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 /* For the thread-private implementation of the internal controls */
1819 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820
Jonathan Peyton30419822017-05-12 18:01:32 +00001821 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001822
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824}
1825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826void ompc_set_nested(int flag) {
1827 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828
Jonathan Peyton30419822017-05-12 18:01:32 +00001829 /* For the thread-private internal controls implementation */
1830 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001831
Jonathan Peyton30419822017-05-12 18:01:32 +00001832 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833
Jonathan Peyton30419822017-05-12 18:01:32 +00001834 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835}
1836
Jonathan Peyton30419822017-05-12 18:01:32 +00001837void ompc_set_max_active_levels(int max_active_levels) {
1838 /* TO DO */
1839 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840
Jonathan Peyton30419822017-05-12 18:01:32 +00001841 /* For the per-thread internal controls implementation */
1842 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843}
1844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845void ompc_set_schedule(omp_sched_t kind, int modifier) {
1846 // !!!!! TODO: check the per-task binding
1847 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848}
1849
Jonathan Peyton30419822017-05-12 18:01:32 +00001850int ompc_get_ancestor_thread_num(int level) {
1851 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852}
1853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854int ompc_get_team_size(int level) {
1855 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856}
1857
Jonathan Peyton30419822017-05-12 18:01:32 +00001858void kmpc_set_stacksize(int arg) {
1859 // __kmp_aux_set_stacksize initializes the library if needed
1860 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001861}
1862
Jonathan Peyton30419822017-05-12 18:01:32 +00001863void kmpc_set_stacksize_s(size_t arg) {
1864 // __kmp_aux_set_stacksize initializes the library if needed
1865 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866}
1867
Jonathan Peyton30419822017-05-12 18:01:32 +00001868void kmpc_set_blocktime(int arg) {
1869 int gtid, tid;
1870 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871
Jonathan Peyton30419822017-05-12 18:01:32 +00001872 gtid = __kmp_entry_gtid();
1873 tid = __kmp_tid_from_gtid(gtid);
1874 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877}
1878
Jonathan Peyton30419822017-05-12 18:01:32 +00001879void kmpc_set_library(int arg) {
1880 // __kmp_user_set_library initializes the library if needed
1881 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882}
1883
Jonathan Peyton30419822017-05-12 18:01:32 +00001884void kmpc_set_defaults(char const *str) {
1885 // __kmp_aux_set_defaults initializes the library if needed
1886 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887}
1888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889void kmpc_set_disp_num_buffers(int arg) {
1890 // ignore after initialization because some teams have already
1891 // allocated dispatch buffers
1892 if (__kmp_init_serial == 0 && arg > 0)
1893 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001894}
1895
Jonathan Peyton30419822017-05-12 18:01:32 +00001896int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001897#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001898 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001899#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001900 if (!TCR_4(__kmp_init_middle)) {
1901 __kmp_middle_initialize();
1902 }
1903 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904#endif
1905}
1906
Jonathan Peyton30419822017-05-12 18:01:32 +00001907int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001908#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001909 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001911 if (!TCR_4(__kmp_init_middle)) {
1912 __kmp_middle_initialize();
1913 }
1914 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915#endif
1916}
1917
Jonathan Peyton30419822017-05-12 18:01:32 +00001918int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001919#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001920 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001922 if (!TCR_4(__kmp_init_middle)) {
1923 __kmp_middle_initialize();
1924 }
1925 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926#endif
1927}
1928
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929/* -------------------------------------------------------------------------- */
1930/*!
1931@ingroup THREADPRIVATE
1932@param loc source location information
1933@param gtid global thread number
1934@param cpy_size size of the cpy_data buffer
1935@param cpy_data pointer to data to be copied
1936@param cpy_func helper function to call for copying data
1937@param didit flag variable: 1=single thread; 0=not single thread
1938
Jonathan Peyton30419822017-05-12 18:01:32 +00001939__kmpc_copyprivate implements the interface for the private data broadcast
1940needed for the copyprivate clause associated with a single region in an
1941OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00001943One of the threads (called the single thread) should have the <tt>didit</tt>
1944variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945All threads pass a pointer to a data buffer (cpy_data) that they have built.
1946
Jonathan Peyton30419822017-05-12 18:01:32 +00001947The OpenMP specification forbids the use of nowait on the single region when a
1948copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1949barrier internally to avoid race conditions, so the code generation for the
1950single region should avoid generating a barrier after the call to @ref
1951__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952
1953The <tt>gtid</tt> parameter is the global thread id for the current thread.
1954The <tt>loc</tt> parameter is a pointer to source location information.
1955
Jonathan Peyton30419822017-05-12 18:01:32 +00001956Internal implementation: The single thread will first copy its descriptor
1957address (cpy_data) to a team-private location, then the other threads will each
1958call the function pointed to by the parameter cpy_func, which carries out the
1959copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960
Jonathan Peyton30419822017-05-12 18:01:32 +00001961The cpy_func routine used for the copy and the contents of the data area defined
1962by cpy_data and cpy_size may be built in any fashion that will allow the copy
1963to be done. For instance, the cpy_data buffer can hold the actual data to be
1964copied or it may hold a list of pointers to the data. The cpy_func routine must
1965interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966
1967The interface to cpy_func is as follows:
1968@code
1969void cpy_func( void *destination, void *source )
1970@endcode
1971where void *destination is the cpy_data pointer for the thread being copied to
1972and void *source is the cpy_data pointer for the thread being copied from.
1973*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001974void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1975 void *cpy_data, void (*cpy_func)(void *, void *),
1976 kmp_int32 didit) {
1977 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978
Jonathan Peyton30419822017-05-12 18:01:32 +00001979 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980
Jonathan Peyton30419822017-05-12 18:01:32 +00001981 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 if (__kmp_env_consistency_check) {
1986 if (loc == 0) {
1987 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990
Jonathan Peyton30419822017-05-12 18:01:32 +00001991 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 if (didit)
1994 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995
Joachim Protze82e94a52017-11-01 10:08:30 +00001996#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001997 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001998 if (ompt_enabled.enabled) {
1999 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00002000 if (ompt_frame->enter_frame == NULL)
2001 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00002002 OMPT_STORE_RETURN_ADDRESS(gtid);
2003 }
2004#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002005/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002006#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002008#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002009 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 if (!didit)
2012 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013
Jonathan Peyton30419822017-05-12 18:01:32 +00002014// Consider next barrier a user-visible barrier for barrier region boundaries
2015// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016
Joachim Protze82e94a52017-11-01 10:08:30 +00002017#if OMPT_SUPPORT
2018 if (ompt_enabled.enabled) {
2019 OMPT_STORE_RETURN_ADDRESS(gtid);
2020 }
2021#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002022#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2024// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002025#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002027#if OMPT_SUPPORT && OMPT_OPTIONAL
2028 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00002029 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002030 }
2031#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032}
2033
2034/* -------------------------------------------------------------------------- */
2035
Jonathan Peyton30419822017-05-12 18:01:32 +00002036#define INIT_LOCK __kmp_init_user_lock_with_checks
2037#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2038#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2039#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2040#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2041#define ACQUIRE_NESTED_LOCK_TIMED \
2042 __kmp_acquire_nested_user_lock_with_checks_timed
2043#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2044#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2045#define TEST_LOCK __kmp_test_user_lock_with_checks
2046#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2047#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2048#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049
Jonathan Peyton30419822017-05-12 18:01:32 +00002050// TODO: Make check abort messages use location info & pass it into
2051// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002053#if KMP_USE_DYNAMIC_LOCK
2054
2055// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002056static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2057 kmp_dyna_lockseq_t seq) {
2058 if (KMP_IS_D_LOCK(seq)) {
2059 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002060#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002061 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002062#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002063 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002064 KMP_INIT_I_LOCK(lock, seq);
2065#if USE_ITT_BUILD
2066 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2067 __kmp_itt_lock_creating(ilk->lock, loc);
2068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 }
2070}
2071
2072// internal nest lock initializer
2073static __forceinline void
2074__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2075 kmp_dyna_lockseq_t seq) {
2076#if KMP_USE_TSX
2077 // Don't have nested lock implementation for speculative locks
2078 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2079 seq = __kmp_user_lock_seq;
2080#endif
2081 switch (seq) {
2082 case lockseq_tas:
2083 seq = lockseq_nested_tas;
2084 break;
2085#if KMP_USE_FUTEX
2086 case lockseq_futex:
2087 seq = lockseq_nested_futex;
2088 break;
2089#endif
2090 case lockseq_ticket:
2091 seq = lockseq_nested_ticket;
2092 break;
2093 case lockseq_queuing:
2094 seq = lockseq_nested_queuing;
2095 break;
2096 case lockseq_drdpa:
2097 seq = lockseq_nested_drdpa;
2098 break;
2099 default:
2100 seq = lockseq_nested_queuing;
2101 }
2102 KMP_INIT_I_LOCK(lock, seq);
2103#if USE_ITT_BUILD
2104 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2105 __kmp_itt_lock_creating(ilk->lock, loc);
2106#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002107}
2108
2109/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002110void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2111 uintptr_t hint) {
2112 KMP_DEBUG_ASSERT(__kmp_init_serial);
2113 if (__kmp_env_consistency_check && user_lock == NULL) {
2114 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2115 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002116
Jonathan Peyton30419822017-05-12 18:01:32 +00002117 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002118
2119#if OMPT_SUPPORT && OMPT_OPTIONAL
2120 // This is the case, if called from omp_init_lock_with_hint:
2121 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2122 if (!codeptr)
2123 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2124 if (ompt_enabled.ompt_callback_lock_init) {
2125 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2126 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002127 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002128 codeptr);
2129 }
2130#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002131}
2132
2133/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002134void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2135 void **user_lock, uintptr_t hint) {
2136 KMP_DEBUG_ASSERT(__kmp_init_serial);
2137 if (__kmp_env_consistency_check && user_lock == NULL) {
2138 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2139 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002140
Jonathan Peyton30419822017-05-12 18:01:32 +00002141 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002142
2143#if OMPT_SUPPORT && OMPT_OPTIONAL
2144 // This is the case, if called from omp_init_lock_with_hint:
2145 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2146 if (!codeptr)
2147 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2148 if (ompt_enabled.ompt_callback_lock_init) {
2149 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2150 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002151 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002152 codeptr);
2153 }
2154#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002155}
2156
2157#endif // KMP_USE_DYNAMIC_LOCK
2158
Jim Cownie5e8470a2013-09-27 10:38:44 +00002159/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002160void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002161#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002162
2163 KMP_DEBUG_ASSERT(__kmp_init_serial);
2164 if (__kmp_env_consistency_check && user_lock == NULL) {
2165 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2166 }
2167 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002168
Joachim Protze82e94a52017-11-01 10:08:30 +00002169#if OMPT_SUPPORT && OMPT_OPTIONAL
2170 // This is the case, if called from omp_init_lock_with_hint:
2171 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2172 if (!codeptr)
2173 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2174 if (ompt_enabled.ompt_callback_lock_init) {
2175 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2176 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002177 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002178 codeptr);
2179 }
2180#endif
2181
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002182#else // KMP_USE_DYNAMIC_LOCK
2183
Jonathan Peyton30419822017-05-12 18:01:32 +00002184 static char const *const func = "omp_init_lock";
2185 kmp_user_lock_p lck;
2186 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187
Jonathan Peyton30419822017-05-12 18:01:32 +00002188 if (__kmp_env_consistency_check) {
2189 if (user_lock == NULL) {
2190 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002191 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002192 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002193
Jonathan Peyton30419822017-05-12 18:01:32 +00002194 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195
Jonathan Peyton30419822017-05-12 18:01:32 +00002196 if ((__kmp_user_lock_kind == lk_tas) &&
2197 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2198 lck = (kmp_user_lock_p)user_lock;
2199 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002200#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002201 else if ((__kmp_user_lock_kind == lk_futex) &&
2202 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2203 lck = (kmp_user_lock_p)user_lock;
2204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 else {
2207 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2208 }
2209 INIT_LOCK(lck);
2210 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002211
Joachim Protze82e94a52017-11-01 10:08:30 +00002212#if OMPT_SUPPORT && OMPT_OPTIONAL
2213 // This is the case, if called from omp_init_lock_with_hint:
2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215 if (!codeptr)
2216 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217 if (ompt_enabled.ompt_callback_lock_init) {
2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002220 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002221 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002222#endif
2223
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002225 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002226#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002227
2228#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002229} // __kmpc_init_lock
2230
2231/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002232void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002233#if KMP_USE_DYNAMIC_LOCK
2234
Jonathan Peyton30419822017-05-12 18:01:32 +00002235 KMP_DEBUG_ASSERT(__kmp_init_serial);
2236 if (__kmp_env_consistency_check && user_lock == NULL) {
2237 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2238 }
2239 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002240
Joachim Protze82e94a52017-11-01 10:08:30 +00002241#if OMPT_SUPPORT && OMPT_OPTIONAL
2242 // This is the case, if called from omp_init_lock_with_hint:
2243 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2244 if (!codeptr)
2245 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2246 if (ompt_enabled.ompt_callback_lock_init) {
2247 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2248 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002249 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002250 codeptr);
2251 }
2252#endif
2253
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002254#else // KMP_USE_DYNAMIC_LOCK
2255
Jonathan Peyton30419822017-05-12 18:01:32 +00002256 static char const *const func = "omp_init_nest_lock";
2257 kmp_user_lock_p lck;
2258 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002259
Jonathan Peyton30419822017-05-12 18:01:32 +00002260 if (__kmp_env_consistency_check) {
2261 if (user_lock == NULL) {
2262 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267
Jonathan Peyton30419822017-05-12 18:01:32 +00002268 if ((__kmp_user_lock_kind == lk_tas) &&
2269 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2270 OMP_NEST_LOCK_T_SIZE)) {
2271 lck = (kmp_user_lock_p)user_lock;
2272 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002273#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002274 else if ((__kmp_user_lock_kind == lk_futex) &&
2275 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2276 OMP_NEST_LOCK_T_SIZE)) {
2277 lck = (kmp_user_lock_p)user_lock;
2278 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002280 else {
2281 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
Jonathan Peyton30419822017-05-12 18:01:32 +00002284 INIT_NESTED_LOCK(lck);
2285 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286
Joachim Protze82e94a52017-11-01 10:08:30 +00002287#if OMPT_SUPPORT && OMPT_OPTIONAL
2288 // This is the case, if called from omp_init_lock_with_hint:
2289 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2290 if (!codeptr)
2291 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2292 if (ompt_enabled.ompt_callback_lock_init) {
2293 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2294 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002295 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002296 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002297#endif
2298
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002300 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002302
2303#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304} // __kmpc_init_nest_lock
2305
Jonathan Peyton30419822017-05-12 18:01:32 +00002306void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002307#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308
Jonathan Peyton30419822017-05-12 18:01:32 +00002309#if USE_ITT_BUILD
2310 kmp_user_lock_p lck;
2311 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2312 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2313 } else {
2314 lck = (kmp_user_lock_p)user_lock;
2315 }
2316 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002318#if OMPT_SUPPORT && OMPT_OPTIONAL
2319 // This is the case, if called from omp_init_lock_with_hint:
2320 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2321 if (!codeptr)
2322 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2323 if (ompt_enabled.ompt_callback_lock_destroy) {
2324 kmp_user_lock_p lck;
2325 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2326 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2327 } else {
2328 lck = (kmp_user_lock_p)user_lock;
2329 }
2330 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002331 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002332 }
2333#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002334 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2335#else
2336 kmp_user_lock_p lck;
2337
2338 if ((__kmp_user_lock_kind == lk_tas) &&
2339 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2340 lck = (kmp_user_lock_p)user_lock;
2341 }
2342#if KMP_USE_FUTEX
2343 else if ((__kmp_user_lock_kind == lk_futex) &&
2344 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2345 lck = (kmp_user_lock_p)user_lock;
2346 }
2347#endif
2348 else {
2349 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2350 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002351
Joachim Protze82e94a52017-11-01 10:08:30 +00002352#if OMPT_SUPPORT && OMPT_OPTIONAL
2353 // This is the case, if called from omp_init_lock_with_hint:
2354 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2355 if (!codeptr)
2356 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2357 if (ompt_enabled.ompt_callback_lock_destroy) {
2358 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002359 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002361#endif
2362
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002364 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002366 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 if ((__kmp_user_lock_kind == lk_tas) &&
2369 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2370 ;
2371 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002372#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 else if ((__kmp_user_lock_kind == lk_futex) &&
2374 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2375 ;
2376 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002378 else {
2379 __kmp_user_lock_free(user_lock, gtid, lck);
2380 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002381#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382} // __kmpc_destroy_lock
2383
2384/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002385void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002386#if KMP_USE_DYNAMIC_LOCK
2387
Jonathan Peyton30419822017-05-12 18:01:32 +00002388#if USE_ITT_BUILD
2389 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2390 __kmp_itt_lock_destroyed(ilk->lock);
2391#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002392#if OMPT_SUPPORT && OMPT_OPTIONAL
2393 // This is the case, if called from omp_init_lock_with_hint:
2394 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2395 if (!codeptr)
2396 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2397 if (ompt_enabled.ompt_callback_lock_destroy) {
2398 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002399 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002400 }
2401#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002402 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002403
2404#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002405
Jonathan Peyton30419822017-05-12 18:01:32 +00002406 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407
Jonathan Peyton30419822017-05-12 18:01:32 +00002408 if ((__kmp_user_lock_kind == lk_tas) &&
2409 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2410 OMP_NEST_LOCK_T_SIZE)) {
2411 lck = (kmp_user_lock_p)user_lock;
2412 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002413#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002414 else if ((__kmp_user_lock_kind == lk_futex) &&
2415 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2416 OMP_NEST_LOCK_T_SIZE)) {
2417 lck = (kmp_user_lock_p)user_lock;
2418 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002420 else {
2421 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2422 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423
Joachim Protze82e94a52017-11-01 10:08:30 +00002424#if OMPT_SUPPORT && OMPT_OPTIONAL
2425 // This is the case, if called from omp_init_lock_with_hint:
2426 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2427 if (!codeptr)
2428 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2429 if (ompt_enabled.ompt_callback_lock_destroy) {
2430 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002431 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002432 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002433#endif
2434
Jim Cownie5e8470a2013-09-27 10:38:44 +00002435#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002436 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002437#endif /* USE_ITT_BUILD */
2438
Jonathan Peyton30419822017-05-12 18:01:32 +00002439 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002440
Jonathan Peyton30419822017-05-12 18:01:32 +00002441 if ((__kmp_user_lock_kind == lk_tas) &&
2442 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2443 OMP_NEST_LOCK_T_SIZE)) {
2444 ;
2445 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002446#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002447 else if ((__kmp_user_lock_kind == lk_futex) &&
2448 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2449 OMP_NEST_LOCK_T_SIZE)) {
2450 ;
2451 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002453 else {
2454 __kmp_user_lock_free(user_lock, gtid, lck);
2455 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002456#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457} // __kmpc_destroy_nest_lock
2458
Jonathan Peyton30419822017-05-12 18:01:32 +00002459void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2460 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002461#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002462 int tag = KMP_EXTRACT_D_TAG(user_lock);
2463#if USE_ITT_BUILD
2464 __kmp_itt_lock_acquiring(
2465 (kmp_user_lock_p)
2466 user_lock); // itt function will get to the right lock object.
2467#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002468#if OMPT_SUPPORT && OMPT_OPTIONAL
2469 // This is the case, if called from omp_init_lock_with_hint:
2470 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2471 if (!codeptr)
2472 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2473 if (ompt_enabled.ompt_callback_mutex_acquire) {
2474 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2475 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002476 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002477 codeptr);
2478 }
2479#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002480#if KMP_USE_INLINED_TAS
2481 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2482 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2483 } else
2484#elif KMP_USE_INLINED_FUTEX
2485 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2486 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2487 } else
2488#endif
2489 {
2490 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2491 }
2492#if USE_ITT_BUILD
2493 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2494#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002495#if OMPT_SUPPORT && OMPT_OPTIONAL
2496 if (ompt_enabled.ompt_callback_mutex_acquired) {
2497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002498 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002499 }
2500#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002501
2502#else // KMP_USE_DYNAMIC_LOCK
2503
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jonathan Peyton30419822017-05-12 18:01:32 +00002506 if ((__kmp_user_lock_kind == lk_tas) &&
2507 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2508 lck = (kmp_user_lock_p)user_lock;
2509 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002510#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002511 else if ((__kmp_user_lock_kind == lk_futex) &&
2512 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2513 lck = (kmp_user_lock_p)user_lock;
2514 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002515#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002516 else {
2517 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2518 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519
2520#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002521 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002523#if OMPT_SUPPORT && OMPT_OPTIONAL
2524 // This is the case, if called from omp_init_lock_with_hint:
2525 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2526 if (!codeptr)
2527 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2528 if (ompt_enabled.ompt_callback_mutex_acquire) {
2529 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2530 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002531 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002532 }
2533#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534
Jonathan Peyton30419822017-05-12 18:01:32 +00002535 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536
2537#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002538 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002539#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540
Joachim Protze82e94a52017-11-01 10:08:30 +00002541#if OMPT_SUPPORT && OMPT_OPTIONAL
2542 if (ompt_enabled.ompt_callback_mutex_acquired) {
2543 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002544 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002545 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002546#endif
2547
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002548#endif // KMP_USE_DYNAMIC_LOCK
2549}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550
Jonathan Peyton30419822017-05-12 18:01:32 +00002551void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002552#if KMP_USE_DYNAMIC_LOCK
2553
Jonathan Peyton30419822017-05-12 18:01:32 +00002554#if USE_ITT_BUILD
2555 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2556#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002557#if OMPT_SUPPORT && OMPT_OPTIONAL
2558 // This is the case, if called from omp_init_lock_with_hint:
2559 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2560 if (!codeptr)
2561 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2562 if (ompt_enabled.enabled) {
2563 if (ompt_enabled.ompt_callback_mutex_acquire) {
2564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2565 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002566 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002567 codeptr);
2568 }
2569 }
2570#endif
2571 int acquire_status =
2572 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002573 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002574#if USE_ITT_BUILD
2575 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002576#endif
2577
Joachim Protze82e94a52017-11-01 10:08:30 +00002578#if OMPT_SUPPORT && OMPT_OPTIONAL
2579 if (ompt_enabled.enabled) {
2580 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2581 if (ompt_enabled.ompt_callback_mutex_acquired) {
2582 // lock_first
2583 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002584 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002585 }
2586 } else {
2587 if (ompt_enabled.ompt_callback_nest_lock) {
2588 // lock_next
2589 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002590 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002591 }
2592 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002593 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002594#endif
2595
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002596#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002597 int acquire_status;
2598 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 if ((__kmp_user_lock_kind == lk_tas) &&
2601 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2602 OMP_NEST_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002605#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002606 else if ((__kmp_user_lock_kind == lk_futex) &&
2607 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2608 OMP_NEST_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2610 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002612 else {
2613 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2614 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615
2616#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002617 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002619#if OMPT_SUPPORT && OMPT_OPTIONAL
2620 // This is the case, if called from omp_init_lock_with_hint:
2621 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2622 if (!codeptr)
2623 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2624 if (ompt_enabled.enabled) {
2625 if (ompt_enabled.ompt_callback_mutex_acquire) {
2626 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2627 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002628 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002629 }
2630 }
2631#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634
2635#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002636 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002638
Joachim Protze82e94a52017-11-01 10:08:30 +00002639#if OMPT_SUPPORT && OMPT_OPTIONAL
2640 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002641 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002642 if (ompt_enabled.ompt_callback_mutex_acquired) {
2643 // lock_first
2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002645 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002646 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002647 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002648 if (ompt_enabled.ompt_callback_nest_lock) {
2649 // lock_next
2650 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002651 ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002652 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002653 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002654 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002655#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002656
2657#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002658}
2659
Jonathan Peyton30419822017-05-12 18:01:32 +00002660void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002661#if KMP_USE_DYNAMIC_LOCK
2662
Jonathan Peyton30419822017-05-12 18:01:32 +00002663 int tag = KMP_EXTRACT_D_TAG(user_lock);
2664#if USE_ITT_BUILD
2665 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2666#endif
2667#if KMP_USE_INLINED_TAS
2668 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2669 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2670 } else
2671#elif KMP_USE_INLINED_FUTEX
2672 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2673 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2674 } else
2675#endif
2676 {
2677 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2678 }
2679
Joachim Protze82e94a52017-11-01 10:08:30 +00002680#if OMPT_SUPPORT && OMPT_OPTIONAL
2681 // This is the case, if called from omp_init_lock_with_hint:
2682 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2683 if (!codeptr)
2684 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2685 if (ompt_enabled.ompt_callback_mutex_released) {
2686 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002687 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002688 }
2689#endif
2690
Jonathan Peyton30419822017-05-12 18:01:32 +00002691#else // KMP_USE_DYNAMIC_LOCK
2692
2693 kmp_user_lock_p lck;
2694
2695 /* Can't use serial interval since not block structured */
2696 /* release the lock */
2697
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700#if KMP_OS_LINUX && \
2701 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2702// "fast" path implemented to fix customer performance issue
2703#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002704 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002705#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2707 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002708
2709#if OMPT_SUPPORT && OMPT_OPTIONAL
2710 // This is the case, if called from omp_init_lock_with_hint:
2711 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2712 if (!codeptr)
2713 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2714 if (ompt_enabled.ompt_callback_mutex_released) {
2715 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002716 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002717 }
2718#endif
2719
Jonathan Peyton30419822017-05-12 18:01:32 +00002720 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002721#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002722 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002725#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 else if ((__kmp_user_lock_kind == lk_futex) &&
2727 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2728 lck = (kmp_user_lock_p)user_lock;
2729 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002730#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002731 else {
2732 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2733 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734
2735#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002736 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737#endif /* USE_ITT_BUILD */
2738
Jonathan Peyton30419822017-05-12 18:01:32 +00002739 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002740
Joachim Protze82e94a52017-11-01 10:08:30 +00002741#if OMPT_SUPPORT && OMPT_OPTIONAL
2742 // This is the case, if called from omp_init_lock_with_hint:
2743 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2744 if (!codeptr)
2745 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2746 if (ompt_enabled.ompt_callback_mutex_released) {
2747 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002748 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002749 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002750#endif
2751
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002752#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753}
2754
2755/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002756void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002757#if KMP_USE_DYNAMIC_LOCK
2758
Jonathan Peyton30419822017-05-12 18:01:32 +00002759#if USE_ITT_BUILD
2760 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2761#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002762 int release_status =
2763 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002764 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002765
2766#if OMPT_SUPPORT && OMPT_OPTIONAL
2767 // This is the case, if called from omp_init_lock_with_hint:
2768 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2769 if (!codeptr)
2770 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2771 if (ompt_enabled.enabled) {
2772 if (release_status == KMP_LOCK_RELEASED) {
2773 if (ompt_enabled.ompt_callback_mutex_released) {
2774 // release_lock_last
2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002776 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002777 }
2778 } else if (ompt_enabled.ompt_callback_nest_lock) {
2779 // release_lock_prev
2780 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002781 ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002782 }
2783 }
2784#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002785
2786#else // KMP_USE_DYNAMIC_LOCK
2787
2788 kmp_user_lock_p lck;
2789
2790 /* Can't use serial interval since not block structured */
2791
2792 if ((__kmp_user_lock_kind == lk_tas) &&
2793 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2794 OMP_NEST_LOCK_T_SIZE)) {
2795#if KMP_OS_LINUX && \
2796 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2797 // "fast" path implemented to fix customer performance issue
2798 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2799#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002800 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002802
2803#if OMPT_SUPPORT && OMPT_OPTIONAL
2804 int release_status = KMP_LOCK_STILL_HELD;
2805#endif
2806
Jonathan Peyton30419822017-05-12 18:01:32 +00002807 if (--(tl->lk.depth_locked) == 0) {
2808 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002809#if OMPT_SUPPORT && OMPT_OPTIONAL
2810 release_status = KMP_LOCK_RELEASED;
2811#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002812 }
2813 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002814
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.enabled) {
2821 if (release_status == KMP_LOCK_RELEASED) {
2822 if (ompt_enabled.ompt_callback_mutex_released) {
2823 // release_lock_last
2824 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002825 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002826 }
2827 } else if (ompt_enabled.ompt_callback_nest_lock) {
2828 // release_lock_previous
2829 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002830 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002831 }
2832 }
2833#endif
2834
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002840#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 else if ((__kmp_user_lock_kind == lk_futex) &&
2842 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2843 OMP_NEST_LOCK_T_SIZE)) {
2844 lck = (kmp_user_lock_p)user_lock;
2845 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002846#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002847 else {
2848 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002850
2851#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002852 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002853#endif /* USE_ITT_BUILD */
2854
Jonathan Peyton30419822017-05-12 18:01:32 +00002855 int release_status;
2856 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002857#if OMPT_SUPPORT && OMPT_OPTIONAL
2858 // This is the case, if called from omp_init_lock_with_hint:
2859 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2860 if (!codeptr)
2861 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2862 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002863 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002864 if (ompt_enabled.ompt_callback_mutex_released) {
2865 // release_lock_last
2866 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002867 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002868 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002869 } else if (ompt_enabled.ompt_callback_nest_lock) {
2870 // release_lock_previous
2871 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002872 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002873 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002874 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002875#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002876
2877#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002878}
2879
2880/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002881int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2882 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002883
2884#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002885 int rc;
2886 int tag = KMP_EXTRACT_D_TAG(user_lock);
2887#if USE_ITT_BUILD
2888 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2889#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002890#if OMPT_SUPPORT && OMPT_OPTIONAL
2891 // This is the case, if called from omp_init_lock_with_hint:
2892 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2893 if (!codeptr)
2894 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2895 if (ompt_enabled.ompt_callback_mutex_acquire) {
2896 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2897 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002898 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002899 codeptr);
2900 }
2901#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002902#if KMP_USE_INLINED_TAS
2903 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2904 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2905 } else
2906#elif KMP_USE_INLINED_FUTEX
2907 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2908 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2909 } else
2910#endif
2911 {
2912 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2913 }
2914 if (rc) {
2915#if USE_ITT_BUILD
2916 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2917#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002918#if OMPT_SUPPORT && OMPT_OPTIONAL
2919 if (ompt_enabled.ompt_callback_mutex_acquired) {
2920 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002921 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002922 }
2923#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 return FTN_TRUE;
2925 } else {
2926#if USE_ITT_BUILD
2927 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2928#endif
2929 return FTN_FALSE;
2930 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002931
2932#else // KMP_USE_DYNAMIC_LOCK
2933
Jonathan Peyton30419822017-05-12 18:01:32 +00002934 kmp_user_lock_p lck;
2935 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936
Jonathan Peyton30419822017-05-12 18:01:32 +00002937 if ((__kmp_user_lock_kind == lk_tas) &&
2938 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2939 lck = (kmp_user_lock_p)user_lock;
2940 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002941#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002942 else if ((__kmp_user_lock_kind == lk_futex) &&
2943 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2944 lck = (kmp_user_lock_p)user_lock;
2945 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002947 else {
2948 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2949 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950
2951#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002952 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002953#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002954#if OMPT_SUPPORT && OMPT_OPTIONAL
2955 // This is the case, if called from omp_init_lock_with_hint:
2956 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2957 if (!codeptr)
2958 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2959 if (ompt_enabled.ompt_callback_mutex_acquire) {
2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2961 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002962 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002963 }
2964#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002965
Jonathan Peyton30419822017-05-12 18:01:32 +00002966 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002968 if (rc) {
2969 __kmp_itt_lock_acquired(lck);
2970 } else {
2971 __kmp_itt_lock_cancelled(lck);
2972 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002974#if OMPT_SUPPORT && OMPT_OPTIONAL
2975 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2976 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002977 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002978 }
2979#endif
2980
Jonathan Peyton30419822017-05-12 18:01:32 +00002981 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982
Jonathan Peyton30419822017-05-12 18:01:32 +00002983/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002984
2985#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002986}
2987
2988/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002989int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002990#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002991 int rc;
2992#if USE_ITT_BUILD
2993 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2994#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002995#if OMPT_SUPPORT && OMPT_OPTIONAL
2996 // This is the case, if called from omp_init_lock_with_hint:
2997 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2998 if (!codeptr)
2999 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3000 if (ompt_enabled.ompt_callback_mutex_acquire) {
3001 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3002 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003003 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003004 codeptr);
3005 }
3006#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003007 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3008#if USE_ITT_BUILD
3009 if (rc) {
3010 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3011 } else {
3012 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3013 }
3014#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003015#if OMPT_SUPPORT && OMPT_OPTIONAL
3016 if (ompt_enabled.enabled && rc) {
3017 if (rc == 1) {
3018 if (ompt_enabled.ompt_callback_mutex_acquired) {
3019 // lock_first
3020 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003021 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003022 }
3023 } else {
3024 if (ompt_enabled.ompt_callback_nest_lock) {
3025 // lock_next
3026 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003027 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003028 }
3029 }
3030 }
3031#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003032 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003033
3034#else // KMP_USE_DYNAMIC_LOCK
3035
Jonathan Peyton30419822017-05-12 18:01:32 +00003036 kmp_user_lock_p lck;
3037 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038
Jonathan Peyton30419822017-05-12 18:01:32 +00003039 if ((__kmp_user_lock_kind == lk_tas) &&
3040 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3041 OMP_NEST_LOCK_T_SIZE)) {
3042 lck = (kmp_user_lock_p)user_lock;
3043 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003044#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003045 else if ((__kmp_user_lock_kind == lk_futex) &&
3046 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3047 OMP_NEST_LOCK_T_SIZE)) {
3048 lck = (kmp_user_lock_p)user_lock;
3049 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003050#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003051 else {
3052 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3053 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003054
3055#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003056 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003057#endif /* USE_ITT_BUILD */
3058
Joachim Protze82e94a52017-11-01 10:08:30 +00003059#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 // This is the case, if called from omp_init_lock_with_hint:
3061 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3062 if (!codeptr)
3063 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3064 if (ompt_enabled.enabled) &&
3065 ompt_enabled.ompt_callback_mutex_acquire) {
3066 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3067 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003068 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003069 }
3070#endif
3071
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003073#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003074 if (rc) {
3075 __kmp_itt_lock_acquired(lck);
3076 } else {
3077 __kmp_itt_lock_cancelled(lck);
3078 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003080#if OMPT_SUPPORT && OMPT_OPTIONAL
3081 if (ompt_enabled.enabled && rc) {
3082 if (rc == 1) {
3083 if (ompt_enabled.ompt_callback_mutex_acquired) {
3084 // lock_first
3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003086 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003087 }
3088 } else {
3089 if (ompt_enabled.ompt_callback_nest_lock) {
3090 // lock_next
3091 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003092 ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003093 }
3094 }
3095 }
3096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003098
Jonathan Peyton30419822017-05-12 18:01:32 +00003099/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003100
3101#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102}
3103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003105
Jonathan Peyton30419822017-05-12 18:01:32 +00003106// keep the selected method in a thread local structure for cross-function
3107// usage: will be used in __kmpc_end_reduce* functions;
3108// another solution: to re-determine the method one more time in
3109// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003110// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003111#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3112 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003113
Jonathan Peyton30419822017-05-12 18:01:32 +00003114#define __KMP_GET_REDUCTION_METHOD(gtid) \
3115 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003116
Jonathan Peyton30419822017-05-12 18:01:32 +00003117// description of the packed_reduction_method variable: look at the macros in
3118// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003119
3120// used in a critical section reduce block
3121static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003122__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3123 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 // this lock was visible to a customer and to the threading profile tool as a
3126 // serial overhead span (although it's used for an internal purpose only)
3127 // why was it visible in previous implementation?
3128 // should we keep it visible in new reduce block?
3129 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003130
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003131#if KMP_USE_DYNAMIC_LOCK
3132
Jonathan Peyton30419822017-05-12 18:01:32 +00003133 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3134 // Check if it is initialized.
3135 if (*lk == 0) {
3136 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3137 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3138 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003139 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003140 __kmp_init_indirect_csptr(crit, loc, global_tid,
3141 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003142 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003143 }
3144 // Branch for accessing the actual lock object and set operation. This
3145 // branching is inevitable since this lock initialization does not follow the
3146 // normal dispatch path (lock table is not used).
3147 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3148 lck = (kmp_user_lock_p)lk;
3149 KMP_DEBUG_ASSERT(lck != NULL);
3150 if (__kmp_env_consistency_check) {
3151 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3152 }
3153 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3154 } else {
3155 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3156 lck = ilk->lock;
3157 KMP_DEBUG_ASSERT(lck != NULL);
3158 if (__kmp_env_consistency_check) {
3159 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3160 }
3161 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3162 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003163
3164#else // KMP_USE_DYNAMIC_LOCK
3165
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 // We know that the fast reduction code is only emitted by Intel compilers
3167 // with 32 byte critical sections. If there isn't enough space, then we
3168 // have to use a pointer.
3169 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3170 lck = (kmp_user_lock_p)crit;
3171 } else {
3172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3173 }
3174 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003175
Jonathan Peyton30419822017-05-12 18:01:32 +00003176 if (__kmp_env_consistency_check)
3177 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003178
Jonathan Peyton30419822017-05-12 18:01:32 +00003179 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003180
3181#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003182}
3183
3184// used in a critical section reduce block
3185static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003186__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3187 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003188
Jonathan Peyton30419822017-05-12 18:01:32 +00003189 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003190
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003191#if KMP_USE_DYNAMIC_LOCK
3192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3194 lck = (kmp_user_lock_p)crit;
3195 if (__kmp_env_consistency_check)
3196 __kmp_pop_sync(global_tid, ct_critical, loc);
3197 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3198 } else {
3199 kmp_indirect_lock_t *ilk =
3200 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3201 if (__kmp_env_consistency_check)
3202 __kmp_pop_sync(global_tid, ct_critical, loc);
3203 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3204 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003205
3206#else // KMP_USE_DYNAMIC_LOCK
3207
Jonathan Peyton30419822017-05-12 18:01:32 +00003208 // We know that the fast reduction code is only emitted by Intel compilers
3209 // with 32 byte critical sections. If there isn't enough space, then we have
3210 // to use a pointer.
3211 if (__kmp_base_user_lock_size > 32) {
3212 lck = *((kmp_user_lock_p *)crit);
3213 KMP_ASSERT(lck != NULL);
3214 } else {
3215 lck = (kmp_user_lock_p)crit;
3216 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217
Jonathan Peyton30419822017-05-12 18:01:32 +00003218 if (__kmp_env_consistency_check)
3219 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220
Jonathan Peyton30419822017-05-12 18:01:32 +00003221 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003223#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224} // __kmp_end_critical_section_reduce_block
3225
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003226#if OMP_40_ENABLED
3227static __forceinline int
3228__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3229 int *task_state) {
3230 kmp_team_t *team;
3231
3232 // Check if we are inside the teams construct?
3233 if (th->th.th_teams_microtask) {
3234 *team_p = team = th->th.th_team;
3235 if (team->t.t_level == th->th.th_teams_level) {
3236 // This is reduction at teams construct.
3237 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3238 // Let's swap teams temporarily for the reduction.
3239 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3240 th->th.th_team = team->t.t_parent;
3241 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3242 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3243 *task_state = th->th.th_task_state;
3244 th->th.th_task_state = 0;
3245
3246 return 1;
3247 }
3248 }
3249 return 0;
3250}
3251
3252static __forceinline void
3253__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3254 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3255 th->th.th_info.ds.ds_tid = 0;
3256 th->th.th_team = team;
3257 th->th.th_team_nproc = team->t.t_nproc;
3258 th->th.th_task_team = team->t.t_task_team[task_state];
3259 th->th.th_task_state = task_state;
3260}
3261#endif
3262
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263/* 2.a.i. Reduce Block without a terminating barrier */
3264/*!
3265@ingroup SYNCHRONIZATION
3266@param loc source location information
3267@param global_tid global thread number
3268@param num_vars number of items (variables) to be reduced
3269@param reduce_size size of data in bytes to be reduced
3270@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003271@param reduce_func callback function providing reduction operation on two
3272operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003273@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003274@result 1 for the master thread, 0 for all other team threads, 2 for all team
3275threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
3277The nowait version is used for a reduce clause with the nowait argument.
3278*/
3279kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003280__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3281 size_t reduce_size, void *reduce_data,
3282 void (*reduce_func)(void *lhs_data, void *rhs_data),
3283 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003284
Jonathan Peyton30419822017-05-12 18:01:32 +00003285 KMP_COUNT_BLOCK(REDUCE_nowait);
3286 int retval = 0;
3287 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003288#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003289 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003290 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003291 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003292#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003293 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003294
Jonathan Peyton30419822017-05-12 18:01:32 +00003295 // why do we need this initialization here at all?
3296 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297
Jonathan Peyton30419822017-05-12 18:01:32 +00003298 // do not call __kmp_serial_initialize(), it will be called by
3299 // __kmp_parallel_initialize() if needed
3300 // possible detection of false-positive race by the threadchecker ???
3301 if (!TCR_4(__kmp_init_parallel))
3302 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003303
Jonathan Peyton30419822017-05-12 18:01:32 +00003304// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003305#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003306 if (__kmp_env_consistency_check)
3307 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003308#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003309 if (__kmp_env_consistency_check)
3310 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003311#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003312
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003313#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003314 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003315 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003316#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003317
Jonathan Peyton30419822017-05-12 18:01:32 +00003318 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3319 // the value should be kept in a variable
3320 // the variable should be either a construct-specific or thread-specific
3321 // property, not a team specific property
3322 // (a thread can reach the next reduce block on the next construct, reduce
3323 // method may differ on the next construct)
3324 // an ident_t "loc" parameter could be used as a construct-specific property
3325 // (what if loc == 0?)
3326 // (if both construct-specific and team-specific variables were shared,
3327 // then unness extra syncs should be needed)
3328 // a thread-specific variable is better regarding two issues above (next
3329 // construct and extra syncs)
3330 // a thread-specific "th_local.reduction_method" variable is used currently
3331 // each thread executes 'determine' and 'set' lines (no need to execute by one
3332 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003333
Jonathan Peyton30419822017-05-12 18:01:32 +00003334 packed_reduction_method = __kmp_determine_reduction_method(
3335 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3336 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337
Jonathan Peyton30419822017-05-12 18:01:32 +00003338 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003339
Jonathan Peyton30419822017-05-12 18:01:32 +00003340 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3341 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003342
Jonathan Peyton30419822017-05-12 18:01:32 +00003343 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003344
Jonathan Peyton30419822017-05-12 18:01:32 +00003345 // usage: if team size == 1, no synchronization is required ( Intel
3346 // platforms only )
3347 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003348
Jonathan Peyton30419822017-05-12 18:01:32 +00003349 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003350
Jonathan Peyton30419822017-05-12 18:01:32 +00003351 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003352
Jonathan Peyton30419822017-05-12 18:01:32 +00003353 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3354 // won't be called by the code gen)
3355 // (it's not quite good, because the checking block has been closed by
3356 // this 'pop',
3357 // but atomic operation has not been executed yet, will be executed
3358 // slightly later, literally on next instruction)
3359 if (__kmp_env_consistency_check)
3360 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003361
Jonathan Peyton30419822017-05-12 18:01:32 +00003362 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3363 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364
Jonathan Peyton30419822017-05-12 18:01:32 +00003365// AT: performance issue: a real barrier here
3366// AT: (if master goes slow, other threads are blocked here waiting for the
3367// master to come and release them)
3368// AT: (it's not what a customer might expect specifying NOWAIT clause)
3369// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3370// be confusing to a customer)
3371// AT: another implementation of *barrier_gather*nowait() (or some other design)
3372// might go faster and be more in line with sense of NOWAIT
3373// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003374
Jonathan Peyton30419822017-05-12 18:01:32 +00003375// this barrier should be invisible to a customer and to the threading profile
3376// tool (it's neither a terminating barrier nor customer's code, it's
3377// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003378#if OMPT_SUPPORT
3379 // JP: can this barrier potentially leed to task scheduling?
3380 // JP: as long as there is a barrier in the implementation, OMPT should and
3381 // will provide the barrier events
3382 // so we set-up the necessary frame/return addresses.
Joachim Protzec5836064b2018-05-28 08:14:58 +00003383 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003384 if (ompt_enabled.enabled) {
3385 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003386 if (ompt_frame->enter_frame == NULL)
3387 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003388 OMPT_STORE_RETURN_ADDRESS(global_tid);
3389 }
3390#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003391#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003392 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003393#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003394 retval =
3395 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3396 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3397 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003398#if OMPT_SUPPORT && OMPT_OPTIONAL
3399 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003400 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003401 }
3402#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003403
Jonathan Peyton30419822017-05-12 18:01:32 +00003404 // all other workers except master should do this pop here
3405 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3406 if (__kmp_env_consistency_check) {
3407 if (retval == 0) {
3408 __kmp_pop_sync(global_tid, ct_reduce, loc);
3409 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003410 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003411
3412 } else {
3413
3414 // should never reach this block
3415 KMP_ASSERT(0); // "unexpected method"
3416 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003417#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003419 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003421#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003422 KA_TRACE(
3423 10,
3424 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3425 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426
Jonathan Peyton30419822017-05-12 18:01:32 +00003427 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003428}
3429
3430/*!
3431@ingroup SYNCHRONIZATION
3432@param loc source location information
3433@param global_tid global thread id.
3434@param lck pointer to the unique lock data structure
3435
3436Finish the execution of a reduce nowait.
3437*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003438void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3439 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440
Jonathan Peyton30419822017-05-12 18:01:32 +00003441 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
Jonathan Peyton30419822017-05-12 18:01:32 +00003447 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003450
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003452
Jonathan Peyton30419822017-05-12 18:01:32 +00003453 // usage: if team size == 1, no synchronization is required ( on Intel
3454 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003457
Jonathan Peyton30419822017-05-12 18:01:32 +00003458 // neither master nor other workers should get here
3459 // (code gen does not generate this call in case 2: atomic reduce block)
3460 // actually it's better to remove this elseif at all;
3461 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003462
Jonathan Peyton30419822017-05-12 18:01:32 +00003463 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3464 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465
Jonathan Peyton30419822017-05-12 18:01:32 +00003466 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003467
Jonathan Peyton30419822017-05-12 18:01:32 +00003468 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003469
Jonathan Peyton30419822017-05-12 18:01:32 +00003470 // should never reach this block
3471 KMP_ASSERT(0); // "unexpected method"
3472 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003473
Jonathan Peyton30419822017-05-12 18:01:32 +00003474 if (__kmp_env_consistency_check)
3475 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003476
Jonathan Peyton30419822017-05-12 18:01:32 +00003477 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3478 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479
Jonathan Peyton30419822017-05-12 18:01:32 +00003480 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481}
3482
3483/* 2.a.ii. Reduce Block with a terminating barrier */
3484
3485/*!
3486@ingroup SYNCHRONIZATION
3487@param loc source location information
3488@param global_tid global thread number
3489@param num_vars number of items (variables) to be reduced
3490@param reduce_size size of data in bytes to be reduced
3491@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003492@param reduce_func callback function providing reduction operation on two
3493operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003494@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003495@result 1 for the master thread, 0 for all other team threads, 2 for all team
3496threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003497
3498A blocking reduce that includes an implicit barrier.
3499*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003500kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3501 size_t reduce_size, void *reduce_data,
3502 void (*reduce_func)(void *lhs_data, void *rhs_data),
3503 kmp_critical_name *lck) {
3504 KMP_COUNT_BLOCK(REDUCE_wait);
3505 int retval = 0;
3506 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003507#if OMP_40_ENABLED
3508 kmp_info_t *th;
3509 kmp_team_t *team;
3510 int teams_swapped = 0, task_state;
3511#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003512
Jonathan Peyton30419822017-05-12 18:01:32 +00003513 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003514
Jonathan Peyton30419822017-05-12 18:01:32 +00003515 // why do we need this initialization here at all?
3516 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003517
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 // do not call __kmp_serial_initialize(), it will be called by
3519 // __kmp_parallel_initialize() if needed
3520 // possible detection of false-positive race by the threadchecker ???
3521 if (!TCR_4(__kmp_init_parallel))
3522 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003523
Jonathan Peyton30419822017-05-12 18:01:32 +00003524// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003525#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003526 if (__kmp_env_consistency_check)
3527 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003528#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003529 if (__kmp_env_consistency_check)
3530 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003531#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003532
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003533#if OMP_40_ENABLED
3534 th = __kmp_thread_from_gtid(global_tid);
3535 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3536#endif // OMP_40_ENABLED
3537
Jonathan Peyton30419822017-05-12 18:01:32 +00003538 packed_reduction_method = __kmp_determine_reduction_method(
3539 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3540 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3545 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546
Jonathan Peyton30419822017-05-12 18:01:32 +00003547 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003548
Jonathan Peyton30419822017-05-12 18:01:32 +00003549 // usage: if team size == 1, no synchronization is required ( Intel
3550 // platforms only )
3551 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003554
Jonathan Peyton30419822017-05-12 18:01:32 +00003555 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003556
Jonathan Peyton30419822017-05-12 18:01:32 +00003557 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3558 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559
Jonathan Peyton30419822017-05-12 18:01:32 +00003560// case tree_reduce_block:
3561// this barrier should be visible to a customer and to the threading profile
3562// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003563#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003564 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003565 if (ompt_enabled.enabled) {
3566 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003567 if (ompt_frame->enter_frame == NULL)
3568 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003569 OMPT_STORE_RETURN_ADDRESS(global_tid);
3570 }
3571#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003572#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 __kmp_threads[global_tid]->th.th_ident =
3574 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003575#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003576 retval =
3577 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3578 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3579 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003580#if OMPT_SUPPORT && OMPT_OPTIONAL
3581 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003582 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003583 }
3584#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003585
Jonathan Peyton30419822017-05-12 18:01:32 +00003586 // all other workers except master should do this pop here
3587 // ( none of other workers except master will enter __kmpc_end_reduce() )
3588 if (__kmp_env_consistency_check) {
3589 if (retval == 0) { // 0: all other workers; 1: master
3590 __kmp_pop_sync(global_tid, ct_reduce, loc);
3591 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003592 }
3593
Jonathan Peyton30419822017-05-12 18:01:32 +00003594 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595
Jonathan Peyton30419822017-05-12 18:01:32 +00003596 // should never reach this block
3597 KMP_ASSERT(0); // "unexpected method"
3598 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003599#if OMP_40_ENABLED
3600 if (teams_swapped) {
3601 __kmp_restore_swapped_teams(th, team, task_state);
3602 }
3603#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003604
3605 KA_TRACE(10,
3606 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3607 global_tid, packed_reduction_method, retval));
3608
3609 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003610}
3611
3612/*!
3613@ingroup SYNCHRONIZATION
3614@param loc source location information
3615@param global_tid global thread id.
3616@param lck pointer to the unique lock data structure
3617
3618Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003619The <tt>lck</tt> pointer must be the same as that used in the corresponding
3620start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003621*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003622void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3623 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003626#if OMP_40_ENABLED
3627 kmp_info_t *th;
3628 kmp_team_t *team;
3629 int teams_swapped = 0, task_state;
3630#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003631
Jonathan Peyton30419822017-05-12 18:01:32 +00003632 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003633
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003634#if OMP_40_ENABLED
3635 th = __kmp_thread_from_gtid(global_tid);
3636 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3637#endif // OMP_40_ENABLED
3638
Jonathan Peyton30419822017-05-12 18:01:32 +00003639 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003640
Jonathan Peyton30419822017-05-12 18:01:32 +00003641 // this barrier should be visible to a customer and to the threading profile
3642 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
Jonathan Peyton30419822017-05-12 18:01:32 +00003646 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647
Jonathan Peyton30419822017-05-12 18:01:32 +00003648// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003649#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003650 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003651 if (ompt_enabled.enabled) {
3652 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003653 if (ompt_frame->enter_frame == NULL)
3654 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003655 OMPT_STORE_RETURN_ADDRESS(global_tid);
3656 }
3657#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003658#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003660#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003661 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003662#if OMPT_SUPPORT && OMPT_OPTIONAL
3663 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003664 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003665 }
3666#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003667
Jonathan Peyton30419822017-05-12 18:01:32 +00003668 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003669
Jonathan Peyton30419822017-05-12 18:01:32 +00003670// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003671
Jonathan Peyton30419822017-05-12 18:01:32 +00003672// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003673#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003674 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003675 if (ompt_enabled.enabled) {
3676 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003677 if (ompt_frame->enter_frame == NULL)
3678 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003679 OMPT_STORE_RETURN_ADDRESS(global_tid);
3680 }
3681#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003682#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003684#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003685 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003686#if OMPT_SUPPORT && OMPT_OPTIONAL
3687 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003688 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003689 }
3690#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003691
Jonathan Peyton30419822017-05-12 18:01:32 +00003692 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003693
Joachim Protze82e94a52017-11-01 10:08:30 +00003694#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003695 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003696 if (ompt_enabled.enabled) {
3697 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003698 if (ompt_frame->enter_frame == NULL)
3699 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003700 OMPT_STORE_RETURN_ADDRESS(global_tid);
3701 }
3702#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003703// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003704#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003705 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003706#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003707 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003708#if OMPT_SUPPORT && OMPT_OPTIONAL
3709 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003710 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003711 }
3712#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713
Jonathan Peyton30419822017-05-12 18:01:32 +00003714 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3715 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716
Jonathan Peyton30419822017-05-12 18:01:32 +00003717 // only master executes here (master releases all other workers)
3718 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3719 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
Jonathan Peyton30419822017-05-12 18:01:32 +00003721 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003722
Jonathan Peyton30419822017-05-12 18:01:32 +00003723 // should never reach this block
3724 KMP_ASSERT(0); // "unexpected method"
3725 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003726#if OMP_40_ENABLED
3727 if (teams_swapped) {
3728 __kmp_restore_swapped_teams(th, team, task_state);
3729 }
3730#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003731
Jonathan Peyton30419822017-05-12 18:01:32 +00003732 if (__kmp_env_consistency_check)
3733 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734
Jonathan Peyton30419822017-05-12 18:01:32 +00003735 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3736 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737
Jonathan Peyton30419822017-05-12 18:01:32 +00003738 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739}
3740
3741#undef __KMP_GET_REDUCTION_METHOD
3742#undef __KMP_SET_REDUCTION_METHOD
3743
Jonathan Peyton30419822017-05-12 18:01:32 +00003744/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
Jonathan Peyton30419822017-05-12 18:01:32 +00003746kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003747
Jonathan Peyton30419822017-05-12 18:01:32 +00003748 kmp_int32 gtid;
3749 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750
Jonathan Peyton30419822017-05-12 18:01:32 +00003751 gtid = __kmp_get_gtid();
3752 if (gtid < 0) {
3753 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003754 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003755 thread = __kmp_thread_from_gtid(gtid);
3756 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757
3758} // __kmpc_get_taskid
3759
Jonathan Peyton30419822017-05-12 18:01:32 +00003760kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003761
Jonathan Peyton30419822017-05-12 18:01:32 +00003762 kmp_int32 gtid;
3763 kmp_info_t *thread;
3764 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 gtid = __kmp_get_gtid();
3767 if (gtid < 0) {
3768 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003769 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 thread = __kmp_thread_from_gtid(gtid);
3771 parent_task = thread->th.th_current_task->td_parent;
3772 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003773
3774} // __kmpc_get_parent_taskid
3775
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003776#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003777/*!
3778@ingroup WORK_SHARING
3779@param loc source location information.
3780@param gtid global thread number.
3781@param num_dims number of associated doacross loops.
3782@param dims info on loops bounds.
3783
3784Initialize doacross loop information.
3785Expect compiler send us inclusive bounds,
3786e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3787*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003788void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003789 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003790 int j, idx;
3791 kmp_int64 last, trace_count;
3792 kmp_info_t *th = __kmp_threads[gtid];
3793 kmp_team_t *team = th->th.th_team;
3794 kmp_uint32 *flags;
3795 kmp_disp_t *pr_buf = th->th.th_dispatch;
3796 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003797
Jonathan Peyton30419822017-05-12 18:01:32 +00003798 KA_TRACE(
3799 20,
3800 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3801 gtid, num_dims, !team->t.t_serialized));
3802 KMP_DEBUG_ASSERT(dims != NULL);
3803 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003804
Jonathan Peyton30419822017-05-12 18:01:32 +00003805 if (team->t.t_serialized) {
3806 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3807 return; // no dependencies if team is serialized
3808 }
3809 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3810 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3811 // the next loop
3812 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003813
Jonathan Peyton30419822017-05-12 18:01:32 +00003814 // Save bounds info into allocated private buffer
3815 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3816 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3817 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3818 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3819 pr_buf->th_doacross_info[0] =
3820 (kmp_int64)num_dims; // first element is number of dimensions
3821 // Save also address of num_done in order to access it later without knowing
3822 // the buffer index
3823 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3824 pr_buf->th_doacross_info[2] = dims[0].lo;
3825 pr_buf->th_doacross_info[3] = dims[0].up;
3826 pr_buf->th_doacross_info[4] = dims[0].st;
3827 last = 5;
3828 for (j = 1; j < num_dims; ++j) {
3829 kmp_int64
3830 range_length; // To keep ranges of all dimensions but the first dims[0]
3831 if (dims[j].st == 1) { // most common case
3832 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3833 range_length = dims[j].up - dims[j].lo + 1;
3834 } else {
3835 if (dims[j].st > 0) {
3836 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3837 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3838 } else { // negative increment
3839 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3840 range_length =
3841 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3842 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003843 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003844 pr_buf->th_doacross_info[last++] = range_length;
3845 pr_buf->th_doacross_info[last++] = dims[j].lo;
3846 pr_buf->th_doacross_info[last++] = dims[j].up;
3847 pr_buf->th_doacross_info[last++] = dims[j].st;
3848 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003849
Jonathan Peyton30419822017-05-12 18:01:32 +00003850 // Compute total trip count.
3851 // Start with range of dims[0] which we don't need to keep in the buffer.
3852 if (dims[0].st == 1) { // most common case
3853 trace_count = dims[0].up - dims[0].lo + 1;
3854 } else if (dims[0].st > 0) {
3855 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3856 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3857 } else { // negative increment
3858 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3859 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3860 }
3861 for (j = 1; j < num_dims; ++j) {
3862 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3863 }
3864 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003865
Jonathan Peyton30419822017-05-12 18:01:32 +00003866 // Check if shared buffer is not occupied by other loop (idx -
3867 // __kmp_dispatch_num_buffers)
3868 if (idx != sh_buf->doacross_buf_idx) {
3869 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003870 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3871 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003872 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003873#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003874 // Check if we are the first thread. After the CAS the first thread gets 0,
3875 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003876 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3877 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3878 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3879#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003880 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003881 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3882#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003883 if (flags == NULL) {
3884 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003885 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003886 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3887 KMP_MB();
3888 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003889 } else if (flags == (kmp_uint32 *)1) {
3890#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003891 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003892 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3893#else
3894 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3895#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003896 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003897 KMP_MB();
3898 } else {
3899 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003901 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003902 pr_buf->th_doacross_flags =
3903 sh_buf->doacross_flags; // save private copy in order to not
3904 // touch shared buffer on each iteration
3905 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003906}
3907
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003908void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003909 kmp_int32 shft, num_dims, i;
3910 kmp_uint32 flag;
3911 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3912 kmp_info_t *th = __kmp_threads[gtid];
3913 kmp_team_t *team = th->th.th_team;
3914 kmp_disp_t *pr_buf;
3915 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003916
Jonathan Peyton30419822017-05-12 18:01:32 +00003917 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3918 if (team->t.t_serialized) {
3919 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3920 return; // no dependencies if team is serialized
3921 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003922
Jonathan Peyton30419822017-05-12 18:01:32 +00003923 // calculate sequential iteration number and check out-of-bounds condition
3924 pr_buf = th->th.th_dispatch;
3925 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3926 num_dims = pr_buf->th_doacross_info[0];
3927 lo = pr_buf->th_doacross_info[2];
3928 up = pr_buf->th_doacross_info[3];
3929 st = pr_buf->th_doacross_info[4];
3930 if (st == 1) { // most common case
3931 if (vec[0] < lo || vec[0] > up) {
3932 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3933 "bounds [%lld,%lld]\n",
3934 gtid, vec[0], lo, up));
3935 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003936 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 iter_number = vec[0] - lo;
3938 } else if (st > 0) {
3939 if (vec[0] < lo || vec[0] > up) {
3940 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3941 "bounds [%lld,%lld]\n",
3942 gtid, vec[0], lo, up));
3943 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003944 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003945 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3946 } else { // negative increment
3947 if (vec[0] > lo || vec[0] < up) {
3948 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3949 "bounds [%lld,%lld]\n",
3950 gtid, vec[0], lo, up));
3951 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003952 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003953 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3954 }
3955 for (i = 1; i < num_dims; ++i) {
3956 kmp_int64 iter, ln;
3957 kmp_int32 j = i * 4;
3958 ln = pr_buf->th_doacross_info[j + 1];
3959 lo = pr_buf->th_doacross_info[j + 2];
3960 up = pr_buf->th_doacross_info[j + 3];
3961 st = pr_buf->th_doacross_info[j + 4];
3962 if (st == 1) {
3963 if (vec[i] < lo || vec[i] > up) {
3964 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3965 "bounds [%lld,%lld]\n",
3966 gtid, vec[i], lo, up));
3967 return;
3968 }
3969 iter = vec[i] - lo;
3970 } else if (st > 0) {
3971 if (vec[i] < lo || vec[i] > up) {
3972 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3973 "bounds [%lld,%lld]\n",
3974 gtid, vec[i], lo, up));
3975 return;
3976 }
3977 iter = (kmp_uint64)(vec[i] - lo) / st;
3978 } else { // st < 0
3979 if (vec[i] > lo || vec[i] < up) {
3980 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3981 "bounds [%lld,%lld]\n",
3982 gtid, vec[i], lo, up));
3983 return;
3984 }
3985 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3986 }
3987 iter_number = iter + ln * iter_number;
3988 }
3989 shft = iter_number % 32; // use 32-bit granularity
3990 iter_number >>= 5; // divided by 32
3991 flag = 1 << shft;
3992 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3993 KMP_YIELD(TRUE);
3994 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003995 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003996 KA_TRACE(20,
3997 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3998 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003999}
4000
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004001void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004002 kmp_int32 shft, num_dims, i;
4003 kmp_uint32 flag;
4004 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4005 kmp_info_t *th = __kmp_threads[gtid];
4006 kmp_team_t *team = th->th.th_team;
4007 kmp_disp_t *pr_buf;
4008 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004009
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4011 if (team->t.t_serialized) {
4012 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4013 return; // no dependencies if team is serialized
4014 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004015
Jonathan Peyton30419822017-05-12 18:01:32 +00004016 // calculate sequential iteration number (same as in "wait" but no
4017 // out-of-bounds checks)
4018 pr_buf = th->th.th_dispatch;
4019 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4020 num_dims = pr_buf->th_doacross_info[0];
4021 lo = pr_buf->th_doacross_info[2];
4022 st = pr_buf->th_doacross_info[4];
4023 if (st == 1) { // most common case
4024 iter_number = vec[0] - lo;
4025 } else if (st > 0) {
4026 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4027 } else { // negative increment
4028 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4029 }
4030 for (i = 1; i < num_dims; ++i) {
4031 kmp_int64 iter, ln;
4032 kmp_int32 j = i * 4;
4033 ln = pr_buf->th_doacross_info[j + 1];
4034 lo = pr_buf->th_doacross_info[j + 2];
4035 st = pr_buf->th_doacross_info[j + 4];
4036 if (st == 1) {
4037 iter = vec[i] - lo;
4038 } else if (st > 0) {
4039 iter = (kmp_uint64)(vec[i] - lo) / st;
4040 } else { // st < 0
4041 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004042 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004043 iter_number = iter + ln * iter_number;
4044 }
4045 shft = iter_number % 32; // use 32-bit granularity
4046 iter_number >>= 5; // divided by 32
4047 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004048 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004049 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004050 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004051 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4052 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004053}
4054
Jonathan Peyton30419822017-05-12 18:01:32 +00004055void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004056 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004057 kmp_info_t *th = __kmp_threads[gtid];
4058 kmp_team_t *team = th->th.th_team;
4059 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004060
Jonathan Peyton30419822017-05-12 18:01:32 +00004061 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4062 if (team->t.t_serialized) {
4063 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4064 return; // nothing to do
4065 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004066 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004067 if (num_done == th->th.th_team_nproc) {
4068 // we are the last thread, need to free shared resources
4069 int idx = pr_buf->th_doacross_buf_idx - 1;
4070 dispatch_shared_info_t *sh_buf =
4071 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4072 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4073 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004074 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004075 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004076 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004077 sh_buf->doacross_flags = NULL;
4078 sh_buf->doacross_num_done = 0;
4079 sh_buf->doacross_buf_idx +=
4080 __kmp_dispatch_num_buffers; // free buffer for future re-use
4081 }
4082 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004083 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004084 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4085 pr_buf->th_doacross_info = NULL;
4086 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004087}
4088#endif
4089
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004090#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004091int __kmpc_get_target_offload(void) {
4092 if (!__kmp_init_serial) {
4093 __kmp_serial_initialize();
4094 }
4095 return __kmp_target_offload;
4096}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004097#endif // OMP_50_ENABLED
4098
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099// end of file //