blob: 9729f39a726a3a5564357f3a9513ca1da5747c31 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000073}
74
75/*!
76@ingroup THREAD_STATES
77@param loc Source location information.
78@return The global thread index of the active thread.
79
80This function can be called in any context.
81
82If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000083single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
84that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000085active parallel construct. (Or zero if there is no active parallel
86construct, since the master thread is necessarily thread zero).
87
88If multiple non-OpenMP threads all enter an OpenMP construct then this
89will be a unique thread identifier among all the threads created by
90the OpenMP runtime (but the value cannote be defined in terms of
91OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000092*/
Jonathan Peyton30419822017-05-12 18:01:32 +000093kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
94 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +000095
Jonathan Peyton30419822017-05-12 18:01:32 +000096 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000097
Jonathan Peyton30419822017-05-12 18:01:32 +000098 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +000099}
100
101/*!
102@ingroup THREAD_STATES
103@param loc Source location information.
104@return The number of threads under control of the OpenMP<sup>*</sup> runtime
105
106This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000107It returns the total number of threads under the control of the OpenMP runtime.
108That is not a number that can be determined by any OpenMP standard calls, since
109the library may be called from more than one non-OpenMP thread, and this
110reflects the total over all such calls. Similarly the runtime maintains
111underlying threads even when they are not active (since the cost of creating
112and destroying OS threads is high), this call counts all such threads even if
113they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000115kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
116 KC_TRACE(10,
117 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118
Jonathan Peyton30419822017-05-12 18:01:32 +0000119 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120}
121
122/*!
123@ingroup THREAD_STATES
124@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000125@return The thread number of the calling thread in the innermost active parallel
126construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000128kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
129 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
130 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131}
132
133/*!
134@ingroup THREAD_STATES
135@param loc Source location information.
136@return The number of threads in the innermost active parallel construct.
137*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000138kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
139 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140
Jonathan Peyton30419822017-05-12 18:01:32 +0000141 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142}
143
144/*!
145 * @ingroup DEPRECATED
146 * @param loc location description
147 *
148 * This function need not be called. It always returns TRUE.
149 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000150kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151#ifndef KMP_DEBUG
152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154
155#else
156
Jonathan Peyton30419822017-05-12 18:01:32 +0000157 const char *semi2;
158 const char *semi3;
159 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000163 }
164 semi2 = loc->psource;
165 if (semi2 == NULL) {
166 return TRUE;
167 }
168 semi2 = strchr(semi2, ';');
169 if (semi2 == NULL) {
170 return TRUE;
171 }
172 semi2 = strchr(semi2 + 1, ';');
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 if (__kmp_par_range_filename[0]) {
177 const char *name = semi2 - 1;
178 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
179 name--;
180 }
181 if ((*name == '/') || (*name == ';')) {
182 name++;
183 }
184 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
185 return __kmp_par_range < 0;
186 }
187 }
188 semi3 = strchr(semi2 + 1, ';');
189 if (__kmp_par_range_routine[0]) {
190 if ((semi3 != NULL) && (semi3 > semi2) &&
191 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
192 return __kmp_par_range < 0;
193 }
194 }
195 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
196 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
197 return __kmp_par_range > 0;
198 }
199 return __kmp_par_range < 0;
200 }
201 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202
203#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204}
205
206/*!
207@ingroup THREAD_STATES
208@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000209@return 1 if this thread is executing inside an active parallel region, zero if
210not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000212kmp_int32 __kmpc_in_parallel(ident_t *loc) {
213 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214}
215
216/*!
217@ingroup PARALLEL
218@param loc source location information
219@param global_tid global thread number
220@param num_threads number of threads requested for this parallel construct
221
222Set the number of threads to be used by the next fork spawned by this thread.
223This call is only required if the parallel construct has a `num_threads` clause.
224*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000225void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
226 kmp_int32 num_threads) {
227 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
228 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231}
232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
234 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235
Jonathan Peyton30419822017-05-12 18:01:32 +0000236 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237}
238
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239#if OMP_40_ENABLED
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
242 kmp_int32 proc_bind) {
243 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
244 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245
Jonathan Peyton30419822017-05-12 18:01:32 +0000246 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247}
248
249#endif /* OMP_40_ENABLED */
250
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251/*!
252@ingroup PARALLEL
253@param loc source location information
254@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000255@param microtask pointer to callback routine consisting of outlined parallel
256construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257@param ... pointers to shared variables that aren't global
258
259Do the actual fork and call the microtask in the relevant number of threads.
260*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000261void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000263
Jonathan Peyton61118492016-05-20 19:03:38 +0000264#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000265 // If we were in a serial region, then stop the serial timer, record
266 // the event, and start parallel region timer
267 stats_state_e previous_state = KMP_GET_THREAD_STATE();
268 if (previous_state == stats_state_e::SERIAL_REGION) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270 } else {
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000273 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 if (inParallel) {
275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276 } else {
277 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000278 }
279#endif
280
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 // maybe to save thr_state is enough here
282 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 va_list ap;
284 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000286#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000287 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000288 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000289 kmp_info_t *master_th = __kmp_threads[gtid];
290 kmp_team_t *parent_team = master_th->th.th_team;
291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292 if (lwt)
293 ompt_frame = &(lwt->ompt_task_info.frame);
294 else {
295 int tid = __kmp_tid_from_gtid(gtid);
296 ompt_frame = &(
297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298 }
Joachim Protzec255ca72017-11-05 14:11:10 +0000299 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000300 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000301 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000302#endif
303
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000304#if INCLUDE_SSC_MARKS
305 SSC_MARK_FORKING();
306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000311#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000317#if INCLUDE_SSC_MARKS
318 SSC_MARK_JOINING();
319#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000321#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ,
323 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000324#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326
Jonathan Peyton30419822017-05-12 18:01:32 +0000327 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000329
330#if KMP_STATS_ENABLED
331 if (previous_state == stats_state_e::SERIAL_REGION) {
332 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
333 } else {
334 KMP_POP_PARTITIONED_TIMER();
335 }
336#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337}
338
339#if OMP_40_ENABLED
340/*!
341@ingroup PARALLEL
342@param loc source location information
343@param global_tid global thread number
344@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000345@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346
347Set the number of teams to be used by the teams construct.
348This call is only required if the teams construct has a `num_teams` clause
349or a `thread_limit` clause (or both).
350*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000351void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
352 kmp_int32 num_teams, kmp_int32 num_threads) {
353 KA_TRACE(20,
354 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356
Jonathan Peyton30419822017-05-12 18:01:32 +0000357 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358}
359
360/*!
361@ingroup PARALLEL
362@param loc source location information
363@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000364@param microtask pointer to callback routine consisting of outlined teams
365construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366@param ... pointers to shared variables that aren't global
367
368Do the actual fork and call the microtask in the relevant number of threads.
369*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000370void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
371 ...) {
372 int gtid = __kmp_entry_gtid();
373 kmp_info_t *this_thr = __kmp_threads[gtid];
374 va_list ap;
375 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000376
Jonathan Peyton30419822017-05-12 18:01:32 +0000377 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000378
Jonathan Peyton30419822017-05-12 18:01:32 +0000379 // remember teams entry point and nesting level
380 this_thr->th.th_teams_microtask = microtask;
381 this_thr->th.th_teams_level =
382 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000383
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000384#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 kmp_team_t *parent_team = this_thr->th.th_team;
386 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000387 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protzec255ca72017-11-05 14:11:10 +0000389 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Jonathan Peyton30419822017-05-12 18:01:32 +0000390 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000391 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000392#endif
393
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 // check if __kmpc_push_num_teams called, set default number of teams
395 // otherwise
396 if (this_thr->th.th_teams_size.nteams == 0) {
397 __kmp_push_num_teams(loc, gtid, 0, 0);
398 }
399 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
400 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
401 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 VOLATILE_CAST(microtask_t)
405 __kmp_teams_master, // "wrapped" task
406 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000407#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000408 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000409#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000410 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 );
413 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000414#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000415 ,
416 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000417#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000419
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 this_thr->th.th_teams_microtask = NULL;
421 this_thr->th.th_teams_level = 0;
422 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
423 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424}
425#endif /* OMP_40_ENABLED */
426
Jim Cownie5e8470a2013-09-27 10:38:44 +0000427// I don't think this function should ever have been exported.
428// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
429// openmp code ever called it, but it's been exported from the RTL for so
430// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000431int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000432
433/*!
434@ingroup PARALLEL
435@param loc source location information
436@param global_tid global thread number
437
438Enter a serialized parallel construct. This interface is used to handle a
439conditional parallel region, like this,
440@code
441#pragma omp parallel if (condition)
442@endcode
443when the condition is false.
444*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000445void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000446// The implementation is now in kmp_runtime.cpp so that it can share static
447// functions with kmp_fork_call since the tasks to be done are similar in
448// each case.
449#if OMPT_SUPPORT
450 OMPT_STORE_RETURN_ADDRESS(global_tid);
451#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453}
454
455/*!
456@ingroup PARALLEL
457@param loc source location information
458@param global_tid global thread number
459
460Leave a serialized parallel construct.
461*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000462void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
463 kmp_internal_control_t *top;
464 kmp_info_t *this_thr;
465 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 KC_TRACE(10,
468 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000469
Jonathan Peyton30419822017-05-12 18:01:32 +0000470 /* skip all this code for autopar serialized loops since it results in
471 unacceptable overhead */
472 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
473 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Jonathan Peyton30419822017-05-12 18:01:32 +0000475 // Not autopar code
476 if (!TCR_4(__kmp_init_parallel))
477 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 this_thr = __kmp_threads[global_tid];
480 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000481
Jonathan Peyton30419822017-05-12 18:01:32 +0000482#if OMP_45_ENABLED
483 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000484
Jonathan Peyton30419822017-05-12 18:01:32 +0000485 // we need to wait for the proxy tasks before finishing the thread
486 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
487 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
488#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000489
Jonathan Peyton30419822017-05-12 18:01:32 +0000490 KMP_MB();
491 KMP_DEBUG_ASSERT(serial_team);
492 KMP_ASSERT(serial_team->t.t_serialized);
493 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
494 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
495 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
496 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000497
Joachim Protze82e94a52017-11-01 10:08:30 +0000498#if OMPT_SUPPORT
499 if (ompt_enabled.enabled &&
500 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000501 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +0000502 if (ompt_enabled.ompt_callback_implicit_task) {
503 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
504 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +0000505 OMPT_CUR_TASK_INFO(this_thr)->thread_num);
Joachim Protze82e94a52017-11-01 10:08:30 +0000506 }
507
508 // reset clear the task id only after unlinking the task
509 ompt_data_t *parent_task_data;
510 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
511
512 if (ompt_enabled.ompt_callback_parallel_end) {
513 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
514 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
515 ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
516 }
517 __ompt_lw_taskteam_unlink(this_thr);
518 this_thr->th.ompt_thread_info.state = omp_state_overhead;
519 }
520#endif
521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 /* If necessary, pop the internal control stack values and replace the team
523 * values */
524 top = serial_team->t.t_control_stack_top;
525 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
526 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
527 serial_team->t.t_control_stack_top = top->next;
528 __kmp_free(top);
529 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 // if( serial_team -> t.t_serialized > 1 )
532 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 /* pop dispatch buffers stack */
535 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
536 {
537 dispatch_private_info_t *disp_buffer =
538 serial_team->t.t_dispatch->th_disp_buffer;
539 serial_team->t.t_dispatch->th_disp_buffer =
540 serial_team->t.t_dispatch->th_disp_buffer->next;
541 __kmp_free(disp_buffer);
542 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 --serial_team->t.t_serialized;
545 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000546
Jonathan Peyton30419822017-05-12 18:01:32 +0000547/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000548
549#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000550 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
551 __kmp_clear_x87_fpu_status_word();
552 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
553 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
556
Jonathan Peyton30419822017-05-12 18:01:32 +0000557 this_thr->th.th_team = serial_team->t.t_parent;
558 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559
Jonathan Peyton30419822017-05-12 18:01:32 +0000560 /* restore values cached in the thread */
561 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
562 this_thr->th.th_team_master =
563 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
564 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 /* TODO the below shouldn't need to be adjusted for serialized teams */
567 this_thr->th.th_dispatch =
568 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569
Jonathan Peyton30419822017-05-12 18:01:32 +0000570 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
573 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575 if (__kmp_tasking_mode != tskm_immediate_exec) {
576 // Copy the task team from the new child / old parent team to the thread.
577 this_thr->th.th_task_team =
578 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
579 KA_TRACE(20,
580 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
581 "team %p\n",
582 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 } else {
585 if (__kmp_tasking_mode != tskm_immediate_exec) {
586 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
587 "depth of serial team %p to %d\n",
588 global_tid, serial_team, serial_team->t.t_serialized));
589 }
590 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591
Jonathan Peyton30419822017-05-12 18:01:32 +0000592 if (__kmp_env_consistency_check)
593 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000594#if OMPT_SUPPORT
595 if (ompt_enabled.enabled)
596 this_thr->th.ompt_thread_info.state =
597 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
598 : omp_state_work_parallel);
599#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600}
601
602/*!
603@ingroup SYNCHRONIZATION
604@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000606Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607depending on the memory ordering convention obeyed by the compiler
608even that may not be necessary).
609*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000610void __kmpc_flush(ident_t *loc) {
611 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 /* need explicit __mf() here since use volatile instead in library */
614 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615
Jonathan Peyton30419822017-05-12 18:01:32 +0000616#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
617#if KMP_MIC
618// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
619// We shouldn't need it, though, since the ABI rules require that
620// * If the compiler generates NGO stores it also generates the fence
621// * If users hand-code NGO stores they should insert the fence
622// therefore no incomplete unordered stores should be visible.
623#else
624 // C74404
625 // This is to address non-temporal store instructions (sfence needed).
626 // The clflush instruction is addressed either (mfence needed).
627 // Probably the non-temporal load monvtdqa instruction should also be
628 // addressed.
629 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
630 if (!__kmp_cpuinfo.initialized) {
631 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000632 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000633 if (!__kmp_cpuinfo.sse2) {
634 // CPU cannot execute SSE2 instructions.
635 } else {
636#if KMP_COMPILER_ICC
637 _mm_mfence();
638#elif KMP_COMPILER_MSVC
639 MemoryBarrier();
640#else
641 __sync_synchronize();
642#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000643 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000644#endif // KMP_MIC
645#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
646// Nothing to see here move along
647#elif KMP_ARCH_PPC64
648// Nothing needed here (we have a real MB above).
649#if KMP_OS_CNK
650 // The flushing thread needs to yield here; this prevents a
651 // busy-waiting thread from saturating the pipeline. flush is
652 // often used in loops like this:
653 // while (!flag) {
654 // #pragma omp flush(flag)
655 // }
656 // and adding the yield here is good for at least a 10x speedup
657 // when running >2 threads per core (on the NAS LU benchmark).
658 __kmp_yield(TRUE);
659#endif
660#else
661#error Unknown or unsupported architecture
662#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000663
664#if OMPT_SUPPORT && OMPT_OPTIONAL
665 if (ompt_enabled.ompt_callback_flush) {
666 ompt_callbacks.ompt_callback(ompt_callback_flush)(
667 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
668 }
669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670}
671
672/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673/*!
674@ingroup SYNCHRONIZATION
675@param loc source location information
676@param global_tid thread id.
677
678Execute a barrier.
679*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000680void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
681 KMP_COUNT_BLOCK(OMP_BARRIER);
682 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683
Jonathan Peyton30419822017-05-12 18:01:32 +0000684 if (!TCR_4(__kmp_init_parallel))
685 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686
Jonathan Peyton30419822017-05-12 18:01:32 +0000687 if (__kmp_env_consistency_check) {
688 if (loc == 0) {
689 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_check_barrier(global_tid, ct_barrier, loc);
693 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000694
Joachim Protze82e94a52017-11-01 10:08:30 +0000695#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000696 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000697 if (ompt_enabled.enabled) {
698 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +0000699 if (ompt_frame->enter_frame == NULL)
700 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000701 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000702 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000703#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000704 __kmp_threads[global_tid]->th.th_ident = loc;
705 // TODO: explicit barrier_wait_id:
706 // this function is called when 'barrier' directive is present or
707 // implicit barrier at the end of a worksharing construct.
708 // 1) better to add a per-thread barrier counter to a thread data structure
709 // 2) set to 0 when a new team is created
710 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000713#if OMPT_SUPPORT && OMPT_OPTIONAL
714 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000715 ompt_frame->enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000718}
719
720/* The BARRIER for a MASTER section is always explicit */
721/*!
722@ingroup WORK_SHARING
723@param loc source location information.
724@param global_tid global thread number .
725@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
726*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000727kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
728 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729
Jonathan Peyton30419822017-05-12 18:01:32 +0000730 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731
Jonathan Peyton30419822017-05-12 18:01:32 +0000732 if (!TCR_4(__kmp_init_parallel))
733 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 if (KMP_MASTER_GTID(global_tid)) {
736 KMP_COUNT_BLOCK(OMP_MASTER);
737 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
738 status = 1;
739 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740
Joachim Protze82e94a52017-11-01 10:08:30 +0000741#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000743 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 kmp_info_t *this_thr = __kmp_threads[global_tid];
745 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000748 ompt_callbacks.ompt_callback(ompt_callback_master)(
749 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
750 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
751 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000752 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000754#endif
755
Jonathan Peyton30419822017-05-12 18:01:32 +0000756 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000757#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 if (status)
759 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
760 else
761 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000762#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 if (status)
764 __kmp_push_sync(global_tid, ct_master, loc, NULL);
765 else
766 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769
Jonathan Peyton30419822017-05-12 18:01:32 +0000770 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771}
772
773/*!
774@ingroup WORK_SHARING
775@param loc source location information.
776@param global_tid global thread number .
777
Jonathan Peyton30419822017-05-12 18:01:32 +0000778Mark the end of a <tt>master</tt> region. This should only be called by the
779thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000781void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
782 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783
Jonathan Peyton30419822017-05-12 18:01:32 +0000784 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
785 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786
Joachim Protze82e94a52017-11-01 10:08:30 +0000787#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000788 kmp_info_t *this_thr = __kmp_threads[global_tid];
789 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000790 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000792 ompt_callbacks.ompt_callback(ompt_callback_master)(
793 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
794 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
795 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000797#endif
798
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 if (__kmp_env_consistency_check) {
800 if (global_tid < 0)
801 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 if (KMP_MASTER_GTID(global_tid))
804 __kmp_pop_sync(global_tid, ct_master, loc);
805 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806}
807
808/*!
809@ingroup WORK_SHARING
810@param loc source location information.
811@param gtid global thread number.
812
813Start execution of an <tt>ordered</tt> construct.
814*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000815void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
816 int cid = 0;
817 kmp_info_t *th;
818 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821
Jonathan Peyton30419822017-05-12 18:01:32 +0000822 if (!TCR_4(__kmp_init_parallel))
823 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
825#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 __kmp_itt_ordered_prep(gtid);
827// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828#endif /* USE_ITT_BUILD */
829
Jonathan Peyton30419822017-05-12 18:01:32 +0000830 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
Joachim Protze82e94a52017-11-01 10:08:30 +0000832#if OMPT_SUPPORT && OMPT_OPTIONAL
833 kmp_team_t *team;
Joachim Protze40636132018-05-28 08:16:08 +0000834 omp_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000835 void *codeptr_ra;
836 if (ompt_enabled.enabled) {
837 OMPT_STORE_RETURN_ADDRESS(gtid);
838 team = __kmp_team_from_gtid(gtid);
Joachim Protze40636132018-05-28 08:16:08 +0000839 lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000840 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000841 th->th.ompt_thread_info.wait_id = lck;
842 th->th.ompt_thread_info.state = omp_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000845 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
846 if (ompt_enabled.ompt_callback_mutex_acquire) {
847 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000848 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze40636132018-05-28 08:16:08 +0000849 (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000850 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000851 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000852#endif
853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 if (th->th.th_dispatch->th_deo_fcn != 0)
855 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
856 else
857 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
Joachim Protze82e94a52017-11-01 10:08:30 +0000859#if OMPT_SUPPORT && OMPT_OPTIONAL
860 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000862 th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000863 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864
Jonathan Peyton30419822017-05-12 18:01:32 +0000865 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000866 if (ompt_enabled.ompt_callback_mutex_acquired) {
867 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +0000868 ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000869 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000871#endif
872
Jim Cownie5e8470a2013-09-27 10:38:44 +0000873#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000874 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875#endif /* USE_ITT_BUILD */
876}
877
878/*!
879@ingroup WORK_SHARING
880@param loc source location information.
881@param gtid global thread number.
882
883End execution of an <tt>ordered</tt> construct.
884*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000885void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
886 int cid = 0;
887 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000888
Jonathan Peyton30419822017-05-12 18:01:32 +0000889 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890
891#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 __kmp_itt_ordered_end(gtid);
893// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000894#endif /* USE_ITT_BUILD */
895
Jonathan Peyton30419822017-05-12 18:01:32 +0000896 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000897
Jonathan Peyton30419822017-05-12 18:01:32 +0000898 if (th->th.th_dispatch->th_dxo_fcn != 0)
899 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
900 else
901 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000902
Joachim Protze82e94a52017-11-01 10:08:30 +0000903#if OMPT_SUPPORT && OMPT_OPTIONAL
904 OMPT_STORE_RETURN_ADDRESS(gtid);
905 if (ompt_enabled.ompt_callback_mutex_released) {
906 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
907 ompt_mutex_ordered,
Joachim Protze40636132018-05-28 08:16:08 +0000908 (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000909 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000911#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000912}
913
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914#if KMP_USE_DYNAMIC_LOCK
915
Jonathan Peytondae13d82015-12-11 21:57:06 +0000916static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000917__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
918 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
919 // Pointer to the allocated indirect lock is written to crit, while indexing
920 // is ignored.
921 void *idx;
922 kmp_indirect_lock_t **lck;
923 lck = (kmp_indirect_lock_t **)crit;
924 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
925 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
926 KMP_SET_I_LOCK_LOCATION(ilk, loc);
927 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
928 KA_TRACE(20,
929 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000930#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000931 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000932#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000933 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000934 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000935#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000936 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000937#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 // We don't really need to destroy the unclaimed lock here since it will be
939 // cleaned up at program exit.
940 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
941 }
942 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000943}
944
945// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000946#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
947 { \
948 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000949 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
950 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
951 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
952 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000953 kmp_uint32 spins; \
954 KMP_FSYNC_PREPARE(l); \
955 KMP_INIT_YIELD(spins); \
956 if (TCR_4(__kmp_nth) > \
957 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
958 KMP_YIELD(TRUE); \
959 } else { \
960 KMP_YIELD_SPIN(spins); \
961 } \
962 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000963 while ( \
964 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
965 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000966 __kmp_spin_backoff(&backoff); \
967 if (TCR_4(__kmp_nth) > \
968 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
969 KMP_YIELD(TRUE); \
970 } else { \
971 KMP_YIELD_SPIN(spins); \
972 } \
973 } \
974 } \
975 KMP_FSYNC_ACQUIRED(l); \
976 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000977
978// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000979#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
980 { \
981 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000982 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
983 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
984 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
985 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000986 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000987
988// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000989#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000990 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000991
Jonathan Peytondae13d82015-12-11 21:57:06 +0000992#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000993
Jonathan Peyton30419822017-05-12 18:01:32 +0000994#include <sys/syscall.h>
995#include <unistd.h>
996#ifndef FUTEX_WAIT
997#define FUTEX_WAIT 0
998#endif
999#ifndef FUTEX_WAKE
1000#define FUTEX_WAKE 1
1001#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001002
1003// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001004#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1005 { \
1006 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1007 kmp_int32 gtid_code = (gtid + 1) << 1; \
1008 KMP_MB(); \
1009 KMP_FSYNC_PREPARE(ftx); \
1010 kmp_int32 poll_val; \
1011 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1012 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1013 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1014 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1015 if (!cond) { \
1016 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1017 poll_val | \
1018 KMP_LOCK_BUSY(1, futex))) { \
1019 continue; \
1020 } \
1021 poll_val |= KMP_LOCK_BUSY(1, futex); \
1022 } \
1023 kmp_int32 rc; \
1024 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1025 NULL, NULL, 0)) != 0) { \
1026 continue; \
1027 } \
1028 gtid_code |= 1; \
1029 } \
1030 KMP_FSYNC_ACQUIRED(ftx); \
1031 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001032
1033// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001034#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1035 { \
1036 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1037 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1038 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1039 KMP_FSYNC_ACQUIRED(ftx); \
1040 rc = TRUE; \
1041 } else { \
1042 rc = FALSE; \
1043 } \
1044 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001045
1046// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001047#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1048 { \
1049 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1050 KMP_MB(); \
1051 KMP_FSYNC_RELEASING(ftx); \
1052 kmp_int32 poll_val = \
1053 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1054 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1055 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1056 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1057 } \
1058 KMP_MB(); \
1059 KMP_YIELD(TCR_4(__kmp_nth) > \
1060 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1061 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001062
Jonathan Peytondae13d82015-12-11 21:57:06 +00001063#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001064
1065#else // KMP_USE_DYNAMIC_LOCK
1066
Jonathan Peyton30419822017-05-12 18:01:32 +00001067static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1068 ident_t const *loc,
1069 kmp_int32 gtid) {
1070 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071
Jonathan Peyton30419822017-05-12 18:01:32 +00001072 // Because of the double-check, the following load doesn't need to be volatile
1073 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001074
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 if (lck == NULL) {
1076 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Jonathan Peyton30419822017-05-12 18:01:32 +00001078 // Allocate & initialize the lock.
1079 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1080 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1081 __kmp_init_user_lock_with_checks(lck);
1082 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001084 __kmp_itt_critical_creating(lck);
1085// __kmp_itt_critical_creating() should be called *before* the first usage
1086// of underlying lock. It is the only place where we can guarantee it. There
1087// are chances the lock will destroyed with no usage, but it is not a
1088// problem, because this is not real event seen by user but rather setting
1089// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090#endif /* USE_ITT_BUILD */
1091
Jonathan Peyton30419822017-05-12 18:01:32 +00001092 // Use a cmpxchg instruction to slam the start of the critical section with
1093 // the lock pointer. If another thread beat us to it, deallocate the lock,
1094 // and use the lock that the other thread allocated.
1095 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096
Jonathan Peyton30419822017-05-12 18:01:32 +00001097 if (status == 0) {
1098// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001099#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001100 __kmp_itt_critical_destroyed(lck);
1101// Let ITT know the lock is destroyed and the same memory location may be reused
1102// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001104 __kmp_destroy_user_lock_with_checks(lck);
1105 __kmp_user_lock_free(&idx, gtid, lck);
1106 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1107 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001109 }
1110 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111}
1112
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001113#endif // KMP_USE_DYNAMIC_LOCK
1114
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115/*!
1116@ingroup WORK_SHARING
1117@param loc source location information.
1118@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001119@param crit identity of the critical section. This could be a pointer to a lock
1120associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121
1122Enter code protected by a `critical` construct.
1123This function blocks until the executing thread can enter the critical section.
1124*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001125void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1126 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001127#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001128#if OMPT_SUPPORT && OMPT_OPTIONAL
1129 OMPT_STORE_RETURN_ADDRESS(global_tid);
1130#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001132#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001134#if OMPT_SUPPORT && OMPT_OPTIONAL
1135 omp_state_t prev_state = omp_state_undefined;
1136 ompt_thread_info_t ti;
1137#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141
Jonathan Peyton30419822017-05-12 18:01:32 +00001142 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001144 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146
Jonathan Peyton30419822017-05-12 18:01:32 +00001147 if ((__kmp_user_lock_kind == lk_tas) &&
1148 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1149 lck = (kmp_user_lock_p)crit;
1150 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001151#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001152 else if ((__kmp_user_lock_kind == lk_futex) &&
1153 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1154 lck = (kmp_user_lock_p)crit;
1155 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001156#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 else { // ticket, queuing or drdpa
1158 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1159 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 if (__kmp_env_consistency_check)
1162 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163
Jonathan Peyton30419822017-05-12 18:01:32 +00001164// since the critical directive binds to all threads, not just the current
1165// team we have to check this even if we are in a serialized team.
1166// also, even if we are the uber thread, we still have to conduct the lock,
1167// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168
1169#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001170 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001172#if OMPT_SUPPORT && OMPT_OPTIONAL
1173 OMPT_STORE_RETURN_ADDRESS(gtid);
1174 void *codeptr_ra = NULL;
1175 if (ompt_enabled.enabled) {
1176 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1177 /* OMPT state update */
1178 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001179 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001180 ti.state = omp_state_wait_critical;
1181
1182 /* OMPT event callback */
1183 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1184 if (ompt_enabled.ompt_callback_mutex_acquire) {
1185 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1186 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00001187 (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001188 }
1189 }
1190#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 // Value of 'crit' should be good for using as a critical_id of the critical
1192 // section directive.
1193 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001194
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001195#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001196 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001197#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001198#if OMPT_SUPPORT && OMPT_OPTIONAL
1199 if (ompt_enabled.enabled) {
1200 /* OMPT state update */
1201 ti.state = prev_state;
1202 ti.wait_id = 0;
1203
1204 /* OMPT event callback */
1205 if (ompt_enabled.ompt_callback_mutex_acquired) {
1206 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001207 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001208 }
1209 }
1210#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001211 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001212
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001213 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001214 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001215#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001216}
1217
1218#if KMP_USE_DYNAMIC_LOCK
1219
1220// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001221static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001222#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001223#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001224#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001225#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001226#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001227
1228#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001229#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001230#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001231#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001232#endif
1233
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 // Hints that do not require further logic
1235 if (hint & kmp_lock_hint_hle)
1236 return KMP_TSX_LOCK(hle);
1237 if (hint & kmp_lock_hint_rtm)
1238 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1239 if (hint & kmp_lock_hint_adaptive)
1240 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001241
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 // Rule out conflicting hints first by returning the default lock
1243 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001244 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 if ((hint & omp_lock_hint_speculative) &&
1246 (hint & omp_lock_hint_nonspeculative))
1247 return __kmp_user_lock_seq;
1248
1249 // Do not even consider speculation when it appears to be contended
1250 if (hint & omp_lock_hint_contended)
1251 return lockseq_queuing;
1252
1253 // Uncontended lock without speculation
1254 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1255 return lockseq_tas;
1256
1257 // HLE lock for speculation
1258 if (hint & omp_lock_hint_speculative)
1259 return KMP_TSX_LOCK(hle);
1260
1261 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001262}
1263
Joachim Protze82e94a52017-11-01 10:08:30 +00001264#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001265#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001266static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001267__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1268 if (user_lock) {
1269 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1270 case 0:
1271 break;
1272#if KMP_USE_FUTEX
1273 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001274 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001275#endif
1276 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001277 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001278#if KMP_USE_TSX
1279 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001280 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001281#endif
1282 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001283 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001284 }
1285 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1286 }
1287 KMP_ASSERT(ilock);
1288 switch (ilock->type) {
1289#if KMP_USE_TSX
1290 case locktag_adaptive:
1291 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001292 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001293#endif
1294 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001295 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001296#if KMP_USE_FUTEX
1297 case locktag_nested_futex:
1298#endif
1299 case locktag_ticket:
1300 case locktag_queuing:
1301 case locktag_drdpa:
1302 case locktag_nested_ticket:
1303 case locktag_nested_queuing:
1304 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001305 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001306 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001307 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001308 }
1309}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001310#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001311// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001312static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001313 switch (__kmp_user_lock_kind) {
1314 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001315 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001316#if KMP_USE_FUTEX
1317 case lk_futex:
1318#endif
1319 case lk_ticket:
1320 case lk_queuing:
1321 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001322 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001323#if KMP_USE_TSX
1324 case lk_hle:
1325 case lk_rtm:
1326 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001327 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001328#endif
1329 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001330 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001331 }
1332}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001333#endif // KMP_USE_DYNAMIC_LOCK
1334#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001335
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001336/*!
1337@ingroup WORK_SHARING
1338@param loc source location information.
1339@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001340@param crit identity of the critical section. This could be a pointer to a lock
1341associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001342@param hint the lock hint.
1343
Jonathan Peyton30419822017-05-12 18:01:32 +00001344Enter code protected by a `critical` construct with a hint. The hint value is
1345used to suggest a lock implementation. This function blocks until the executing
1346thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001347speculative execution and the hardware supports it.
1348*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001349void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1350 kmp_critical_name *crit, uintptr_t hint) {
1351 KMP_COUNT_BLOCK(OMP_CRITICAL);
1352 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001353#if OMPT_SUPPORT && OMPT_OPTIONAL
1354 omp_state_t prev_state = omp_state_undefined;
1355 ompt_thread_info_t ti;
1356 // This is the case, if called from __kmpc_critical:
1357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1358 if (!codeptr)
1359 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1360#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001361
Jonathan Peyton30419822017-05-12 18:01:32 +00001362 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001363
Jonathan Peyton30419822017-05-12 18:01:32 +00001364 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1365 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001366 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001367 if (*lk == 0) {
1368 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1369 if (KMP_IS_D_LOCK(lckseq)) {
1370 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1371 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001372 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001373 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001374 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 }
1376 // Branch for accessing the actual lock object and set operation. This
1377 // branching is inevitable since this lock initialization does not follow the
1378 // normal dispatch path (lock table is not used).
1379 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1380 lck = (kmp_user_lock_p)lk;
1381 if (__kmp_env_consistency_check) {
1382 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1383 __kmp_map_hint_to_lock(hint));
1384 }
1385#if USE_ITT_BUILD
1386 __kmp_itt_critical_acquiring(lck);
1387#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001388#if OMPT_SUPPORT && OMPT_OPTIONAL
1389 if (ompt_enabled.enabled) {
1390 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1391 /* OMPT state update */
1392 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001393 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001394 ti.state = omp_state_wait_critical;
1395
1396 /* OMPT event callback */
1397 if (ompt_enabled.ompt_callback_mutex_acquire) {
1398 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1399 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001400 __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001401 }
1402 }
1403#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001404#if KMP_USE_INLINED_TAS
1405 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1406 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1407 } else
1408#elif KMP_USE_INLINED_FUTEX
1409 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1410 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1411 } else
1412#endif
1413 {
1414 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1415 }
1416 } else {
1417 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1418 lck = ilk->lock;
1419 if (__kmp_env_consistency_check) {
1420 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1421 __kmp_map_hint_to_lock(hint));
1422 }
1423#if USE_ITT_BUILD
1424 __kmp_itt_critical_acquiring(lck);
1425#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001426#if OMPT_SUPPORT && OMPT_OPTIONAL
1427 if (ompt_enabled.enabled) {
1428 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1429 /* OMPT state update */
1430 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001431 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001432 ti.state = omp_state_wait_critical;
1433
1434 /* OMPT event callback */
1435 if (ompt_enabled.ompt_callback_mutex_acquire) {
1436 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1437 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001438 __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001439 }
1440 }
1441#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001442 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1443 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001444 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001445
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001447 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001448#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001449#if OMPT_SUPPORT && OMPT_OPTIONAL
1450 if (ompt_enabled.enabled) {
1451 /* OMPT state update */
1452 ti.state = prev_state;
1453 ti.wait_id = 0;
1454
1455 /* OMPT event callback */
1456 if (ompt_enabled.ompt_callback_mutex_acquired) {
1457 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001458 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001459 }
1460 }
1461#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001462
Jonathan Peyton30419822017-05-12 18:01:32 +00001463 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1464 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001465} // __kmpc_critical_with_hint
1466
1467#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001468
1469/*!
1470@ingroup WORK_SHARING
1471@param loc source location information.
1472@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001473@param crit identity of the critical section. This could be a pointer to a lock
1474associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475
1476Leave a critical section, releasing any lock that was held during its execution.
1477*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001478void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1479 kmp_critical_name *crit) {
1480 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001483
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001484#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001485 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1486 lck = (kmp_user_lock_p)crit;
1487 KMP_ASSERT(lck != NULL);
1488 if (__kmp_env_consistency_check) {
1489 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001490 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001491#if USE_ITT_BUILD
1492 __kmp_itt_critical_releasing(lck);
1493#endif
1494#if KMP_USE_INLINED_TAS
1495 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1496 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1497 } else
1498#elif KMP_USE_INLINED_FUTEX
1499 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1500 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1501 } else
1502#endif
1503 {
1504 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1505 }
1506 } else {
1507 kmp_indirect_lock_t *ilk =
1508 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1509 KMP_ASSERT(ilk != NULL);
1510 lck = ilk->lock;
1511 if (__kmp_env_consistency_check) {
1512 __kmp_pop_sync(global_tid, ct_critical, loc);
1513 }
1514#if USE_ITT_BUILD
1515 __kmp_itt_critical_releasing(lck);
1516#endif
1517 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1518 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001519
1520#else // KMP_USE_DYNAMIC_LOCK
1521
Jonathan Peyton30419822017-05-12 18:01:32 +00001522 if ((__kmp_user_lock_kind == lk_tas) &&
1523 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1524 lck = (kmp_user_lock_p)crit;
1525 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001526#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001527 else if ((__kmp_user_lock_kind == lk_futex) &&
1528 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1529 lck = (kmp_user_lock_p)crit;
1530 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001531#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001532 else { // ticket, queuing or drdpa
1533 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1534 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535
Jonathan Peyton30419822017-05-12 18:01:32 +00001536 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001537
Jonathan Peyton30419822017-05-12 18:01:32 +00001538 if (__kmp_env_consistency_check)
1539 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001540
1541#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001542 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001544 // Value of 'crit' should be good for using as a critical_id of the critical
1545 // section directive.
1546 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Joachim Protze82e94a52017-11-01 10:08:30 +00001548#endif // KMP_USE_DYNAMIC_LOCK
1549
1550#if OMPT_SUPPORT && OMPT_OPTIONAL
1551 /* OMPT release event triggers after lock is released; place here to trigger
1552 * for all #if branches */
1553 OMPT_STORE_RETURN_ADDRESS(global_tid);
1554 if (ompt_enabled.ompt_callback_mutex_released) {
1555 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00001556 ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001557 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001558#endif
1559
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 KMP_POP_PARTITIONED_TIMER();
1561 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562}
1563
1564/*!
1565@ingroup SYNCHRONIZATION
1566@param loc source location information
1567@param global_tid thread id.
1568@return one if the thread should execute the master block, zero otherwise
1569
Jonathan Peyton30419822017-05-12 18:01:32 +00001570Start execution of a combined barrier and master. The barrier is executed inside
1571this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001572*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001573kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1574 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001577
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 if (!TCR_4(__kmp_init_parallel))
1579 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580
Jonathan Peyton30419822017-05-12 18:01:32 +00001581 if (__kmp_env_consistency_check)
1582 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583
Joachim Protze82e94a52017-11-01 10:08:30 +00001584#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001585 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001586 if (ompt_enabled.enabled) {
1587 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001588 if (ompt_frame->enter_frame == NULL)
1589 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001590 OMPT_STORE_RETURN_ADDRESS(global_tid);
1591 }
1592#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001593#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001595#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001596 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001597#if OMPT_SUPPORT && OMPT_OPTIONAL
1598 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001599 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001600 }
1601#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
Jonathan Peyton30419822017-05-12 18:01:32 +00001603 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001604}
1605
1606/*!
1607@ingroup SYNCHRONIZATION
1608@param loc source location information
1609@param global_tid thread id.
1610
1611Complete the execution of a combined barrier and master. This function should
1612only be called at the completion of the <tt>master</tt> code. Other threads will
1613still be waiting at the barrier and this call releases them.
1614*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001615void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1616 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001617
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001619}
1620
1621/*!
1622@ingroup SYNCHRONIZATION
1623@param loc source location information
1624@param global_tid thread id.
1625@return one if the thread should execute the master block, zero otherwise
1626
1627Start execution of a combined barrier and master(nowait) construct.
1628The barrier is executed inside this function.
1629There is no equivalent "end" function, since the
1630*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001631kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1632 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633
Jonathan Peyton30419822017-05-12 18:01:32 +00001634 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635
Jonathan Peyton30419822017-05-12 18:01:32 +00001636 if (!TCR_4(__kmp_init_parallel))
1637 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638
Jonathan Peyton30419822017-05-12 18:01:32 +00001639 if (__kmp_env_consistency_check) {
1640 if (loc == 0) {
1641 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001643 __kmp_check_barrier(global_tid, ct_barrier, loc);
1644 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Joachim Protze82e94a52017-11-01 10:08:30 +00001646#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001647 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001648 if (ompt_enabled.enabled) {
1649 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001650 if (ompt_frame->enter_frame == NULL)
1651 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001652 OMPT_STORE_RETURN_ADDRESS(global_tid);
1653 }
1654#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001655#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001656 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001658 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001659#if OMPT_SUPPORT && OMPT_OPTIONAL
1660 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001661 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001662 }
1663#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 if (__kmp_env_consistency_check) {
1668 /* there's no __kmpc_end_master called; so the (stats) */
1669 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 if (global_tid < 0) {
1672 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001674 if (ret) {
1675 /* only one thread should do the pop since only */
1676 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 __kmp_pop_sync(global_tid, ct_master, loc);
1679 }
1680 }
1681
1682 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683}
1684
1685/* The BARRIER for a SINGLE process section is always explicit */
1686/*!
1687@ingroup WORK_SHARING
1688@param loc source location information
1689@param global_tid global thread number
1690@return One if this thread should execute the single construct, zero otherwise.
1691
1692Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001693There are no implicit barriers in the two "single" calls, rather the compiler
1694should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695*/
1696
Jonathan Peyton30419822017-05-12 18:01:32 +00001697kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1698 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001699
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 if (rc) {
1701 // We are going to execute the single statement, so we should count it.
1702 KMP_COUNT_BLOCK(OMP_SINGLE);
1703 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1704 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001705
Joachim Protze82e94a52017-11-01 10:08:30 +00001706#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001707 kmp_info_t *this_thr = __kmp_threads[global_tid];
1708 kmp_team_t *team = this_thr->th.th_team;
1709 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001710
Joachim Protze82e94a52017-11-01 10:08:30 +00001711 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001713 if (ompt_enabled.ompt_callback_work) {
1714 ompt_callbacks.ompt_callback(ompt_callback_work)(
1715 ompt_work_single_executor, ompt_scope_begin,
1716 &(team->t.ompt_team_info.parallel_data),
1717 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1718 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 }
1720 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001721 if (ompt_enabled.ompt_callback_work) {
1722 ompt_callbacks.ompt_callback(ompt_callback_work)(
1723 ompt_work_single_other, ompt_scope_begin,
1724 &(team->t.ompt_team_info.parallel_data),
1725 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1726 1, OMPT_GET_RETURN_ADDRESS(0));
1727 ompt_callbacks.ompt_callback(ompt_callback_work)(
1728 ompt_work_single_other, ompt_scope_end,
1729 &(team->t.ompt_team_info.parallel_data),
1730 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1731 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001732 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001733 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001734 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001735#endif
1736
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738}
1739
1740/*!
1741@ingroup WORK_SHARING
1742@param loc source location information
1743@param global_tid global thread number
1744
1745Mark the end of a <tt>single</tt> construct. This function should
1746only be called by the thread that executed the block of code protected
1747by the `single` construct.
1748*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001749void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1750 __kmp_exit_single(global_tid);
1751 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001752
Joachim Protze82e94a52017-11-01 10:08:30 +00001753#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001754 kmp_info_t *this_thr = __kmp_threads[global_tid];
1755 kmp_team_t *team = this_thr->th.th_team;
1756 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001757
Joachim Protze82e94a52017-11-01 10:08:30 +00001758 if (ompt_enabled.ompt_callback_work) {
1759 ompt_callbacks.ompt_callback(ompt_callback_work)(
1760 ompt_work_single_executor, ompt_scope_end,
1761 &(team->t.ompt_team_info.parallel_data),
1762 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1763 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001764 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001765#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766}
1767
1768/*!
1769@ingroup WORK_SHARING
1770@param loc Source location
1771@param global_tid Global thread id
1772
1773Mark the end of a statically scheduled loop.
1774*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001775void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001776 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778
Joachim Protze82e94a52017-11-01 10:08:30 +00001779#if OMPT_SUPPORT && OMPT_OPTIONAL
1780 if (ompt_enabled.ompt_callback_work) {
Joachim Protze91732472017-11-10 21:07:01 +00001781 ompt_work_type_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001783 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1784 // Determine workshare type
1785 if (loc != NULL) {
1786 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1787 ompt_work_type = ompt_work_loop;
1788 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1789 ompt_work_type = ompt_work_sections;
1790 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1791 ompt_work_type = ompt_work_distribute;
1792 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001793 // use default set above.
1794 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001795 }
1796 KMP_DEBUG_ASSERT(ompt_work_type);
1797 }
1798 ompt_callbacks.ompt_callback(ompt_callback_work)(
1799 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1800 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001802#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 if (__kmp_env_consistency_check)
1804 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805}
1806
Jonathan Peyton30419822017-05-12 18:01:32 +00001807// User routines which take C-style arguments (call by value)
1808// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809
Jonathan Peyton30419822017-05-12 18:01:32 +00001810void ompc_set_num_threads(int arg) {
1811 // !!!!! TODO: check the per-task binding
1812 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813}
1814
Jonathan Peyton30419822017-05-12 18:01:32 +00001815void ompc_set_dynamic(int flag) {
1816 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 /* For the thread-private implementation of the internal controls */
1819 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820
Jonathan Peyton30419822017-05-12 18:01:32 +00001821 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001822
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824}
1825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826void ompc_set_nested(int flag) {
1827 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828
Jonathan Peyton30419822017-05-12 18:01:32 +00001829 /* For the thread-private internal controls implementation */
1830 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001831
Jonathan Peyton30419822017-05-12 18:01:32 +00001832 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833
Jonathan Peyton30419822017-05-12 18:01:32 +00001834 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835}
1836
Jonathan Peyton30419822017-05-12 18:01:32 +00001837void ompc_set_max_active_levels(int max_active_levels) {
1838 /* TO DO */
1839 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840
Jonathan Peyton30419822017-05-12 18:01:32 +00001841 /* For the per-thread internal controls implementation */
1842 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843}
1844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845void ompc_set_schedule(omp_sched_t kind, int modifier) {
1846 // !!!!! TODO: check the per-task binding
1847 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848}
1849
Jonathan Peyton30419822017-05-12 18:01:32 +00001850int ompc_get_ancestor_thread_num(int level) {
1851 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852}
1853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854int ompc_get_team_size(int level) {
1855 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856}
1857
Jonathan Peyton30419822017-05-12 18:01:32 +00001858void kmpc_set_stacksize(int arg) {
1859 // __kmp_aux_set_stacksize initializes the library if needed
1860 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001861}
1862
Jonathan Peyton30419822017-05-12 18:01:32 +00001863void kmpc_set_stacksize_s(size_t arg) {
1864 // __kmp_aux_set_stacksize initializes the library if needed
1865 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866}
1867
Jonathan Peyton30419822017-05-12 18:01:32 +00001868void kmpc_set_blocktime(int arg) {
1869 int gtid, tid;
1870 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871
Jonathan Peyton30419822017-05-12 18:01:32 +00001872 gtid = __kmp_entry_gtid();
1873 tid = __kmp_tid_from_gtid(gtid);
1874 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877}
1878
Jonathan Peyton30419822017-05-12 18:01:32 +00001879void kmpc_set_library(int arg) {
1880 // __kmp_user_set_library initializes the library if needed
1881 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882}
1883
Jonathan Peyton30419822017-05-12 18:01:32 +00001884void kmpc_set_defaults(char const *str) {
1885 // __kmp_aux_set_defaults initializes the library if needed
1886 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887}
1888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889void kmpc_set_disp_num_buffers(int arg) {
1890 // ignore after initialization because some teams have already
1891 // allocated dispatch buffers
1892 if (__kmp_init_serial == 0 && arg > 0)
1893 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001894}
1895
Jonathan Peyton30419822017-05-12 18:01:32 +00001896int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001897#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001898 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001899#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001900 if (!TCR_4(__kmp_init_middle)) {
1901 __kmp_middle_initialize();
1902 }
1903 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904#endif
1905}
1906
Jonathan Peyton30419822017-05-12 18:01:32 +00001907int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001908#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001909 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001911 if (!TCR_4(__kmp_init_middle)) {
1912 __kmp_middle_initialize();
1913 }
1914 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915#endif
1916}
1917
Jonathan Peyton30419822017-05-12 18:01:32 +00001918int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001919#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001920 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001922 if (!TCR_4(__kmp_init_middle)) {
1923 __kmp_middle_initialize();
1924 }
1925 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926#endif
1927}
1928
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929/* -------------------------------------------------------------------------- */
1930/*!
1931@ingroup THREADPRIVATE
1932@param loc source location information
1933@param gtid global thread number
1934@param cpy_size size of the cpy_data buffer
1935@param cpy_data pointer to data to be copied
1936@param cpy_func helper function to call for copying data
1937@param didit flag variable: 1=single thread; 0=not single thread
1938
Jonathan Peyton30419822017-05-12 18:01:32 +00001939__kmpc_copyprivate implements the interface for the private data broadcast
1940needed for the copyprivate clause associated with a single region in an
1941OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00001943One of the threads (called the single thread) should have the <tt>didit</tt>
1944variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945All threads pass a pointer to a data buffer (cpy_data) that they have built.
1946
Jonathan Peyton30419822017-05-12 18:01:32 +00001947The OpenMP specification forbids the use of nowait on the single region when a
1948copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1949barrier internally to avoid race conditions, so the code generation for the
1950single region should avoid generating a barrier after the call to @ref
1951__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952
1953The <tt>gtid</tt> parameter is the global thread id for the current thread.
1954The <tt>loc</tt> parameter is a pointer to source location information.
1955
Jonathan Peyton30419822017-05-12 18:01:32 +00001956Internal implementation: The single thread will first copy its descriptor
1957address (cpy_data) to a team-private location, then the other threads will each
1958call the function pointed to by the parameter cpy_func, which carries out the
1959copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960
Jonathan Peyton30419822017-05-12 18:01:32 +00001961The cpy_func routine used for the copy and the contents of the data area defined
1962by cpy_data and cpy_size may be built in any fashion that will allow the copy
1963to be done. For instance, the cpy_data buffer can hold the actual data to be
1964copied or it may hold a list of pointers to the data. The cpy_func routine must
1965interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966
1967The interface to cpy_func is as follows:
1968@code
1969void cpy_func( void *destination, void *source )
1970@endcode
1971where void *destination is the cpy_data pointer for the thread being copied to
1972and void *source is the cpy_data pointer for the thread being copied from.
1973*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001974void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1975 void *cpy_data, void (*cpy_func)(void *, void *),
1976 kmp_int32 didit) {
1977 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978
Jonathan Peyton30419822017-05-12 18:01:32 +00001979 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980
Jonathan Peyton30419822017-05-12 18:01:32 +00001981 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 if (__kmp_env_consistency_check) {
1986 if (loc == 0) {
1987 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990
Jonathan Peyton30419822017-05-12 18:01:32 +00001991 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 if (didit)
1994 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995
Joachim Protze82e94a52017-11-01 10:08:30 +00001996#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001997 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001998 if (ompt_enabled.enabled) {
1999 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00002000 if (ompt_frame->enter_frame == NULL)
2001 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00002002 OMPT_STORE_RETURN_ADDRESS(gtid);
2003 }
2004#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002005/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002006#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002008#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002009 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 if (!didit)
2012 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013
Jonathan Peyton30419822017-05-12 18:01:32 +00002014// Consider next barrier a user-visible barrier for barrier region boundaries
2015// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016
Joachim Protze82e94a52017-11-01 10:08:30 +00002017#if OMPT_SUPPORT
2018 if (ompt_enabled.enabled) {
2019 OMPT_STORE_RETURN_ADDRESS(gtid);
2020 }
2021#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002022#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2024// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002025#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002027#if OMPT_SUPPORT && OMPT_OPTIONAL
2028 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00002029 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002030 }
2031#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032}
2033
2034/* -------------------------------------------------------------------------- */
2035
Jonathan Peyton30419822017-05-12 18:01:32 +00002036#define INIT_LOCK __kmp_init_user_lock_with_checks
2037#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2038#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2039#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2040#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2041#define ACQUIRE_NESTED_LOCK_TIMED \
2042 __kmp_acquire_nested_user_lock_with_checks_timed
2043#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2044#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2045#define TEST_LOCK __kmp_test_user_lock_with_checks
2046#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2047#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2048#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049
Jonathan Peyton30419822017-05-12 18:01:32 +00002050// TODO: Make check abort messages use location info & pass it into
2051// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002053#if KMP_USE_DYNAMIC_LOCK
2054
2055// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002056static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2057 kmp_dyna_lockseq_t seq) {
2058 if (KMP_IS_D_LOCK(seq)) {
2059 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002060#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002061 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002062#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002063 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002064 KMP_INIT_I_LOCK(lock, seq);
2065#if USE_ITT_BUILD
2066 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2067 __kmp_itt_lock_creating(ilk->lock, loc);
2068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 }
2070}
2071
2072// internal nest lock initializer
2073static __forceinline void
2074__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2075 kmp_dyna_lockseq_t seq) {
2076#if KMP_USE_TSX
2077 // Don't have nested lock implementation for speculative locks
2078 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2079 seq = __kmp_user_lock_seq;
2080#endif
2081 switch (seq) {
2082 case lockseq_tas:
2083 seq = lockseq_nested_tas;
2084 break;
2085#if KMP_USE_FUTEX
2086 case lockseq_futex:
2087 seq = lockseq_nested_futex;
2088 break;
2089#endif
2090 case lockseq_ticket:
2091 seq = lockseq_nested_ticket;
2092 break;
2093 case lockseq_queuing:
2094 seq = lockseq_nested_queuing;
2095 break;
2096 case lockseq_drdpa:
2097 seq = lockseq_nested_drdpa;
2098 break;
2099 default:
2100 seq = lockseq_nested_queuing;
2101 }
2102 KMP_INIT_I_LOCK(lock, seq);
2103#if USE_ITT_BUILD
2104 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2105 __kmp_itt_lock_creating(ilk->lock, loc);
2106#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002107}
2108
2109/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002110void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2111 uintptr_t hint) {
2112 KMP_DEBUG_ASSERT(__kmp_init_serial);
2113 if (__kmp_env_consistency_check && user_lock == NULL) {
2114 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2115 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002116
Jonathan Peyton30419822017-05-12 18:01:32 +00002117 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002118
2119#if OMPT_SUPPORT && OMPT_OPTIONAL
2120 // This is the case, if called from omp_init_lock_with_hint:
2121 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2122 if (!codeptr)
2123 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2124 if (ompt_enabled.ompt_callback_lock_init) {
2125 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2126 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002127 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002128 codeptr);
2129 }
2130#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002131}
2132
2133/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002134void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2135 void **user_lock, uintptr_t hint) {
2136 KMP_DEBUG_ASSERT(__kmp_init_serial);
2137 if (__kmp_env_consistency_check && user_lock == NULL) {
2138 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2139 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002140
Jonathan Peyton30419822017-05-12 18:01:32 +00002141 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002142
2143#if OMPT_SUPPORT && OMPT_OPTIONAL
2144 // This is the case, if called from omp_init_lock_with_hint:
2145 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2146 if (!codeptr)
2147 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2148 if (ompt_enabled.ompt_callback_lock_init) {
2149 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2150 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002151 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002152 codeptr);
2153 }
2154#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002155}
2156
2157#endif // KMP_USE_DYNAMIC_LOCK
2158
Jim Cownie5e8470a2013-09-27 10:38:44 +00002159/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002160void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002161#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002162
2163 KMP_DEBUG_ASSERT(__kmp_init_serial);
2164 if (__kmp_env_consistency_check && user_lock == NULL) {
2165 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2166 }
2167 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002168
Joachim Protze82e94a52017-11-01 10:08:30 +00002169#if OMPT_SUPPORT && OMPT_OPTIONAL
2170 // This is the case, if called from omp_init_lock_with_hint:
2171 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2172 if (!codeptr)
2173 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2174 if (ompt_enabled.ompt_callback_lock_init) {
2175 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2176 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002177 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002178 codeptr);
2179 }
2180#endif
2181
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002182#else // KMP_USE_DYNAMIC_LOCK
2183
Jonathan Peyton30419822017-05-12 18:01:32 +00002184 static char const *const func = "omp_init_lock";
2185 kmp_user_lock_p lck;
2186 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187
Jonathan Peyton30419822017-05-12 18:01:32 +00002188 if (__kmp_env_consistency_check) {
2189 if (user_lock == NULL) {
2190 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002191 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002192 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002193
Jonathan Peyton30419822017-05-12 18:01:32 +00002194 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195
Jonathan Peyton30419822017-05-12 18:01:32 +00002196 if ((__kmp_user_lock_kind == lk_tas) &&
2197 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2198 lck = (kmp_user_lock_p)user_lock;
2199 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002200#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002201 else if ((__kmp_user_lock_kind == lk_futex) &&
2202 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2203 lck = (kmp_user_lock_p)user_lock;
2204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 else {
2207 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2208 }
2209 INIT_LOCK(lck);
2210 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002211
Joachim Protze82e94a52017-11-01 10:08:30 +00002212#if OMPT_SUPPORT && OMPT_OPTIONAL
2213 // This is the case, if called from omp_init_lock_with_hint:
2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215 if (!codeptr)
2216 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217 if (ompt_enabled.ompt_callback_lock_init) {
2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002220 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002221 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002222#endif
2223
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002225 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002226#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002227
2228#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002229} // __kmpc_init_lock
2230
2231/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002232void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002233#if KMP_USE_DYNAMIC_LOCK
2234
Jonathan Peyton30419822017-05-12 18:01:32 +00002235 KMP_DEBUG_ASSERT(__kmp_init_serial);
2236 if (__kmp_env_consistency_check && user_lock == NULL) {
2237 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2238 }
2239 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002240
Joachim Protze82e94a52017-11-01 10:08:30 +00002241#if OMPT_SUPPORT && OMPT_OPTIONAL
2242 // This is the case, if called from omp_init_lock_with_hint:
2243 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2244 if (!codeptr)
2245 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2246 if (ompt_enabled.ompt_callback_lock_init) {
2247 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2248 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002249 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002250 codeptr);
2251 }
2252#endif
2253
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002254#else // KMP_USE_DYNAMIC_LOCK
2255
Jonathan Peyton30419822017-05-12 18:01:32 +00002256 static char const *const func = "omp_init_nest_lock";
2257 kmp_user_lock_p lck;
2258 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002259
Jonathan Peyton30419822017-05-12 18:01:32 +00002260 if (__kmp_env_consistency_check) {
2261 if (user_lock == NULL) {
2262 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267
Jonathan Peyton30419822017-05-12 18:01:32 +00002268 if ((__kmp_user_lock_kind == lk_tas) &&
2269 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2270 OMP_NEST_LOCK_T_SIZE)) {
2271 lck = (kmp_user_lock_p)user_lock;
2272 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002273#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002274 else if ((__kmp_user_lock_kind == lk_futex) &&
2275 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2276 OMP_NEST_LOCK_T_SIZE)) {
2277 lck = (kmp_user_lock_p)user_lock;
2278 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002280 else {
2281 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
Jonathan Peyton30419822017-05-12 18:01:32 +00002284 INIT_NESTED_LOCK(lck);
2285 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002286
Joachim Protze82e94a52017-11-01 10:08:30 +00002287#if OMPT_SUPPORT && OMPT_OPTIONAL
2288 // This is the case, if called from omp_init_lock_with_hint:
2289 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2290 if (!codeptr)
2291 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2292 if (ompt_enabled.ompt_callback_lock_init) {
2293 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2294 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002295 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002296 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002297#endif
2298
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002300 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002302
2303#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304} // __kmpc_init_nest_lock
2305
Jonathan Peyton30419822017-05-12 18:01:32 +00002306void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002307#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308
Jonathan Peyton30419822017-05-12 18:01:32 +00002309#if USE_ITT_BUILD
2310 kmp_user_lock_p lck;
2311 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2312 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2313 } else {
2314 lck = (kmp_user_lock_p)user_lock;
2315 }
2316 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002318#if OMPT_SUPPORT && OMPT_OPTIONAL
2319 // This is the case, if called from omp_init_lock_with_hint:
2320 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2321 if (!codeptr)
2322 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2323 if (ompt_enabled.ompt_callback_lock_destroy) {
2324 kmp_user_lock_p lck;
2325 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2326 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2327 } else {
2328 lck = (kmp_user_lock_p)user_lock;
2329 }
2330 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002331 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002332 }
2333#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002334 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2335#else
2336 kmp_user_lock_p lck;
2337
2338 if ((__kmp_user_lock_kind == lk_tas) &&
2339 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2340 lck = (kmp_user_lock_p)user_lock;
2341 }
2342#if KMP_USE_FUTEX
2343 else if ((__kmp_user_lock_kind == lk_futex) &&
2344 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2345 lck = (kmp_user_lock_p)user_lock;
2346 }
2347#endif
2348 else {
2349 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2350 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002351
Joachim Protze82e94a52017-11-01 10:08:30 +00002352#if OMPT_SUPPORT && OMPT_OPTIONAL
2353 // This is the case, if called from omp_init_lock_with_hint:
2354 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2355 if (!codeptr)
2356 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2357 if (ompt_enabled.ompt_callback_lock_destroy) {
2358 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002359 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002361#endif
2362
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002364 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002366 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 if ((__kmp_user_lock_kind == lk_tas) &&
2369 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2370 ;
2371 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002372#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 else if ((__kmp_user_lock_kind == lk_futex) &&
2374 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2375 ;
2376 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002378 else {
2379 __kmp_user_lock_free(user_lock, gtid, lck);
2380 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002381#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382} // __kmpc_destroy_lock
2383
2384/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002385void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002386#if KMP_USE_DYNAMIC_LOCK
2387
Jonathan Peyton30419822017-05-12 18:01:32 +00002388#if USE_ITT_BUILD
2389 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2390 __kmp_itt_lock_destroyed(ilk->lock);
2391#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002392#if OMPT_SUPPORT && OMPT_OPTIONAL
2393 // This is the case, if called from omp_init_lock_with_hint:
2394 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2395 if (!codeptr)
2396 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2397 if (ompt_enabled.ompt_callback_lock_destroy) {
2398 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002399 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002400 }
2401#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002402 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002403
2404#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002405
Jonathan Peyton30419822017-05-12 18:01:32 +00002406 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407
Jonathan Peyton30419822017-05-12 18:01:32 +00002408 if ((__kmp_user_lock_kind == lk_tas) &&
2409 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2410 OMP_NEST_LOCK_T_SIZE)) {
2411 lck = (kmp_user_lock_p)user_lock;
2412 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002413#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002414 else if ((__kmp_user_lock_kind == lk_futex) &&
2415 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2416 OMP_NEST_LOCK_T_SIZE)) {
2417 lck = (kmp_user_lock_p)user_lock;
2418 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002420 else {
2421 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2422 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423
Joachim Protze82e94a52017-11-01 10:08:30 +00002424#if OMPT_SUPPORT && OMPT_OPTIONAL
2425 // This is the case, if called from omp_init_lock_with_hint:
2426 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2427 if (!codeptr)
2428 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2429 if (ompt_enabled.ompt_callback_lock_destroy) {
2430 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002431 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002432 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002433#endif
2434
Jim Cownie5e8470a2013-09-27 10:38:44 +00002435#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002436 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002437#endif /* USE_ITT_BUILD */
2438
Jonathan Peyton30419822017-05-12 18:01:32 +00002439 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002440
Jonathan Peyton30419822017-05-12 18:01:32 +00002441 if ((__kmp_user_lock_kind == lk_tas) &&
2442 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2443 OMP_NEST_LOCK_T_SIZE)) {
2444 ;
2445 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002446#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002447 else if ((__kmp_user_lock_kind == lk_futex) &&
2448 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2449 OMP_NEST_LOCK_T_SIZE)) {
2450 ;
2451 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002453 else {
2454 __kmp_user_lock_free(user_lock, gtid, lck);
2455 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002456#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457} // __kmpc_destroy_nest_lock
2458
Jonathan Peyton30419822017-05-12 18:01:32 +00002459void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2460 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002461#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002462 int tag = KMP_EXTRACT_D_TAG(user_lock);
2463#if USE_ITT_BUILD
2464 __kmp_itt_lock_acquiring(
2465 (kmp_user_lock_p)
2466 user_lock); // itt function will get to the right lock object.
2467#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002468#if OMPT_SUPPORT && OMPT_OPTIONAL
2469 // This is the case, if called from omp_init_lock_with_hint:
2470 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2471 if (!codeptr)
2472 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2473 if (ompt_enabled.ompt_callback_mutex_acquire) {
2474 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2475 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002476 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002477 codeptr);
2478 }
2479#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002480#if KMP_USE_INLINED_TAS
2481 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2482 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2483 } else
2484#elif KMP_USE_INLINED_FUTEX
2485 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2486 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2487 } else
2488#endif
2489 {
2490 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2491 }
2492#if USE_ITT_BUILD
2493 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2494#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002495#if OMPT_SUPPORT && OMPT_OPTIONAL
2496 if (ompt_enabled.ompt_callback_mutex_acquired) {
2497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002498 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002499 }
2500#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002501
2502#else // KMP_USE_DYNAMIC_LOCK
2503
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jonathan Peyton30419822017-05-12 18:01:32 +00002506 if ((__kmp_user_lock_kind == lk_tas) &&
2507 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2508 lck = (kmp_user_lock_p)user_lock;
2509 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002510#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002511 else if ((__kmp_user_lock_kind == lk_futex) &&
2512 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2513 lck = (kmp_user_lock_p)user_lock;
2514 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002515#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002516 else {
2517 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2518 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519
2520#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002521 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002523#if OMPT_SUPPORT && OMPT_OPTIONAL
2524 // This is the case, if called from omp_init_lock_with_hint:
2525 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2526 if (!codeptr)
2527 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2528 if (ompt_enabled.ompt_callback_mutex_acquire) {
2529 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2530 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002531 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002532 }
2533#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534
Jonathan Peyton30419822017-05-12 18:01:32 +00002535 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536
2537#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002538 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002539#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540
Joachim Protze82e94a52017-11-01 10:08:30 +00002541#if OMPT_SUPPORT && OMPT_OPTIONAL
2542 if (ompt_enabled.ompt_callback_mutex_acquired) {
2543 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002544 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002545 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002546#endif
2547
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002548#endif // KMP_USE_DYNAMIC_LOCK
2549}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550
Jonathan Peyton30419822017-05-12 18:01:32 +00002551void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002552#if KMP_USE_DYNAMIC_LOCK
2553
Jonathan Peyton30419822017-05-12 18:01:32 +00002554#if USE_ITT_BUILD
2555 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2556#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002557#if OMPT_SUPPORT && OMPT_OPTIONAL
2558 // This is the case, if called from omp_init_lock_with_hint:
2559 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2560 if (!codeptr)
2561 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2562 if (ompt_enabled.enabled) {
2563 if (ompt_enabled.ompt_callback_mutex_acquire) {
2564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2565 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002566 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002567 codeptr);
2568 }
2569 }
2570#endif
2571 int acquire_status =
2572 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002573#if USE_ITT_BUILD
2574 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002575#endif
2576
Joachim Protze82e94a52017-11-01 10:08:30 +00002577#if OMPT_SUPPORT && OMPT_OPTIONAL
2578 if (ompt_enabled.enabled) {
2579 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2580 if (ompt_enabled.ompt_callback_mutex_acquired) {
2581 // lock_first
2582 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002583 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002584 }
2585 } else {
2586 if (ompt_enabled.ompt_callback_nest_lock) {
2587 // lock_next
2588 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002589 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002590 }
2591 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002592 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002593#endif
2594
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002595#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002596 int acquire_status;
2597 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598
Jonathan Peyton30419822017-05-12 18:01:32 +00002599 if ((__kmp_user_lock_kind == lk_tas) &&
2600 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2601 OMP_NEST_LOCK_T_SIZE)) {
2602 lck = (kmp_user_lock_p)user_lock;
2603 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002604#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 else if ((__kmp_user_lock_kind == lk_futex) &&
2606 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2607 OMP_NEST_LOCK_T_SIZE)) {
2608 lck = (kmp_user_lock_p)user_lock;
2609 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002611 else {
2612 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2613 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614
2615#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002616 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002617#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002618#if OMPT_SUPPORT && OMPT_OPTIONAL
2619 // This is the case, if called from omp_init_lock_with_hint:
2620 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2621 if (!codeptr)
2622 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2623 if (ompt_enabled.enabled) {
2624 if (ompt_enabled.ompt_callback_mutex_acquire) {
2625 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2626 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002627 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002628 }
2629 }
2630#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002631
Jonathan Peyton30419822017-05-12 18:01:32 +00002632 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002633
2634#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002635 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002637
Joachim Protze82e94a52017-11-01 10:08:30 +00002638#if OMPT_SUPPORT && OMPT_OPTIONAL
2639 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002640 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002641 if (ompt_enabled.ompt_callback_mutex_acquired) {
2642 // lock_first
2643 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002644 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002645 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002646 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002647 if (ompt_enabled.ompt_callback_nest_lock) {
2648 // lock_next
2649 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002650 ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002651 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002652 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002653 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002654#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002655
2656#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657}
2658
Jonathan Peyton30419822017-05-12 18:01:32 +00002659void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002660#if KMP_USE_DYNAMIC_LOCK
2661
Jonathan Peyton30419822017-05-12 18:01:32 +00002662 int tag = KMP_EXTRACT_D_TAG(user_lock);
2663#if USE_ITT_BUILD
2664 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2665#endif
2666#if KMP_USE_INLINED_TAS
2667 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2668 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2669 } else
2670#elif KMP_USE_INLINED_FUTEX
2671 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2672 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2673 } else
2674#endif
2675 {
2676 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2677 }
2678
Joachim Protze82e94a52017-11-01 10:08:30 +00002679#if OMPT_SUPPORT && OMPT_OPTIONAL
2680 // This is the case, if called from omp_init_lock_with_hint:
2681 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2682 if (!codeptr)
2683 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2684 if (ompt_enabled.ompt_callback_mutex_released) {
2685 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002686 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002687 }
2688#endif
2689
Jonathan Peyton30419822017-05-12 18:01:32 +00002690#else // KMP_USE_DYNAMIC_LOCK
2691
2692 kmp_user_lock_p lck;
2693
2694 /* Can't use serial interval since not block structured */
2695 /* release the lock */
2696
2697 if ((__kmp_user_lock_kind == lk_tas) &&
2698 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2699#if KMP_OS_LINUX && \
2700 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2701// "fast" path implemented to fix customer performance issue
2702#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002703 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002704#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002705 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2706 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002707
2708#if OMPT_SUPPORT && OMPT_OPTIONAL
2709 // This is the case, if called from omp_init_lock_with_hint:
2710 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2711 if (!codeptr)
2712 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2713 if (ompt_enabled.ompt_callback_mutex_released) {
2714 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002715 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002716 }
2717#endif
2718
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002721 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002723 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002724#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002725 else if ((__kmp_user_lock_kind == lk_futex) &&
2726 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2727 lck = (kmp_user_lock_p)user_lock;
2728 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 else {
2731 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2732 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733
2734#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736#endif /* USE_ITT_BUILD */
2737
Jonathan Peyton30419822017-05-12 18:01:32 +00002738 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002739
Joachim Protze82e94a52017-11-01 10:08:30 +00002740#if OMPT_SUPPORT && OMPT_OPTIONAL
2741 // This is the case, if called from omp_init_lock_with_hint:
2742 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2743 if (!codeptr)
2744 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2745 if (ompt_enabled.ompt_callback_mutex_released) {
2746 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002747 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002749#endif
2750
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002751#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752}
2753
2754/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002755void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002756#if KMP_USE_DYNAMIC_LOCK
2757
Jonathan Peyton30419822017-05-12 18:01:32 +00002758#if USE_ITT_BUILD
2759 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2760#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002761 int release_status =
2762 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2763
2764#if OMPT_SUPPORT && OMPT_OPTIONAL
2765 // This is the case, if called from omp_init_lock_with_hint:
2766 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2767 if (!codeptr)
2768 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2769 if (ompt_enabled.enabled) {
2770 if (release_status == KMP_LOCK_RELEASED) {
2771 if (ompt_enabled.ompt_callback_mutex_released) {
2772 // release_lock_last
2773 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002774 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002775 }
2776 } else if (ompt_enabled.ompt_callback_nest_lock) {
2777 // release_lock_prev
2778 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002779 ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002780 }
2781 }
2782#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002783
2784#else // KMP_USE_DYNAMIC_LOCK
2785
2786 kmp_user_lock_p lck;
2787
2788 /* Can't use serial interval since not block structured */
2789
2790 if ((__kmp_user_lock_kind == lk_tas) &&
2791 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2792 OMP_NEST_LOCK_T_SIZE)) {
2793#if KMP_OS_LINUX && \
2794 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2795 // "fast" path implemented to fix customer performance issue
2796 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2797#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002798 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002800
2801#if OMPT_SUPPORT && OMPT_OPTIONAL
2802 int release_status = KMP_LOCK_STILL_HELD;
2803#endif
2804
Jonathan Peyton30419822017-05-12 18:01:32 +00002805 if (--(tl->lk.depth_locked) == 0) {
2806 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002807#if OMPT_SUPPORT && OMPT_OPTIONAL
2808 release_status = KMP_LOCK_RELEASED;
2809#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002810 }
2811 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002812
2813#if OMPT_SUPPORT && OMPT_OPTIONAL
2814 // This is the case, if called from omp_init_lock_with_hint:
2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816 if (!codeptr)
2817 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2818 if (ompt_enabled.enabled) {
2819 if (release_status == KMP_LOCK_RELEASED) {
2820 if (ompt_enabled.ompt_callback_mutex_released) {
2821 // release_lock_last
2822 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002823 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002824 }
2825 } else if (ompt_enabled.ompt_callback_nest_lock) {
2826 // release_lock_previous
2827 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002828 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002829 }
2830 }
2831#endif
2832
Jonathan Peyton30419822017-05-12 18:01:32 +00002833 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002834#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002838#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 else if ((__kmp_user_lock_kind == lk_futex) &&
2840 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2841 OMP_NEST_LOCK_T_SIZE)) {
2842 lck = (kmp_user_lock_p)user_lock;
2843 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 else {
2846 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2847 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848
2849#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002850 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002851#endif /* USE_ITT_BUILD */
2852
Jonathan Peyton30419822017-05-12 18:01:32 +00002853 int release_status;
2854 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002855#if OMPT_SUPPORT && OMPT_OPTIONAL
2856 // This is the case, if called from omp_init_lock_with_hint:
2857 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2858 if (!codeptr)
2859 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2860 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002861 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002862 if (ompt_enabled.ompt_callback_mutex_released) {
2863 // release_lock_last
2864 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002865 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002866 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002867 } else if (ompt_enabled.ompt_callback_nest_lock) {
2868 // release_lock_previous
2869 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002870 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002871 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002873#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002874
2875#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002876}
2877
2878/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002879int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2880 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002881
2882#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002883 int rc;
2884 int tag = KMP_EXTRACT_D_TAG(user_lock);
2885#if USE_ITT_BUILD
2886 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2887#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002888#if OMPT_SUPPORT && OMPT_OPTIONAL
2889 // This is the case, if called from omp_init_lock_with_hint:
2890 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2891 if (!codeptr)
2892 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2893 if (ompt_enabled.ompt_callback_mutex_acquire) {
2894 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2895 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002896 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002897 codeptr);
2898 }
2899#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002900#if KMP_USE_INLINED_TAS
2901 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2902 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2903 } else
2904#elif KMP_USE_INLINED_FUTEX
2905 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2906 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2907 } else
2908#endif
2909 {
2910 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2911 }
2912 if (rc) {
2913#if USE_ITT_BUILD
2914 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2915#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002916#if OMPT_SUPPORT && OMPT_OPTIONAL
2917 if (ompt_enabled.ompt_callback_mutex_acquired) {
2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002919 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002920 }
2921#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002922 return FTN_TRUE;
2923 } else {
2924#if USE_ITT_BUILD
2925 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2926#endif
2927 return FTN_FALSE;
2928 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002929
2930#else // KMP_USE_DYNAMIC_LOCK
2931
Jonathan Peyton30419822017-05-12 18:01:32 +00002932 kmp_user_lock_p lck;
2933 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002934
Jonathan Peyton30419822017-05-12 18:01:32 +00002935 if ((__kmp_user_lock_kind == lk_tas) &&
2936 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2937 lck = (kmp_user_lock_p)user_lock;
2938 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002939#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002940 else if ((__kmp_user_lock_kind == lk_futex) &&
2941 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2942 lck = (kmp_user_lock_p)user_lock;
2943 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002945 else {
2946 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2947 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948
2949#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002950 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002951#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002952#if OMPT_SUPPORT && OMPT_OPTIONAL
2953 // This is the case, if called from omp_init_lock_with_hint:
2954 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2955 if (!codeptr)
2956 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2957 if (ompt_enabled.ompt_callback_mutex_acquire) {
2958 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2959 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002960 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002961 }
2962#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963
Jonathan Peyton30419822017-05-12 18:01:32 +00002964 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002965#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002966 if (rc) {
2967 __kmp_itt_lock_acquired(lck);
2968 } else {
2969 __kmp_itt_lock_cancelled(lck);
2970 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002972#if OMPT_SUPPORT && OMPT_OPTIONAL
2973 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002975 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002976 }
2977#endif
2978
Jonathan Peyton30419822017-05-12 18:01:32 +00002979 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002980
Jonathan Peyton30419822017-05-12 18:01:32 +00002981/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002982
2983#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002984}
2985
2986/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002987int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002988#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002989 int rc;
2990#if USE_ITT_BUILD
2991 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2992#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002993#if OMPT_SUPPORT && OMPT_OPTIONAL
2994 // This is the case, if called from omp_init_lock_with_hint:
2995 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2996 if (!codeptr)
2997 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2998 if (ompt_enabled.ompt_callback_mutex_acquire) {
2999 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3000 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003001 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003002 codeptr);
3003 }
3004#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003005 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3006#if USE_ITT_BUILD
3007 if (rc) {
3008 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3009 } else {
3010 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3011 }
3012#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003013#if OMPT_SUPPORT && OMPT_OPTIONAL
3014 if (ompt_enabled.enabled && rc) {
3015 if (rc == 1) {
3016 if (ompt_enabled.ompt_callback_mutex_acquired) {
3017 // lock_first
3018 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003019 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003020 }
3021 } else {
3022 if (ompt_enabled.ompt_callback_nest_lock) {
3023 // lock_next
3024 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003025 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003026 }
3027 }
3028 }
3029#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003030 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003031
3032#else // KMP_USE_DYNAMIC_LOCK
3033
Jonathan Peyton30419822017-05-12 18:01:32 +00003034 kmp_user_lock_p lck;
3035 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036
Jonathan Peyton30419822017-05-12 18:01:32 +00003037 if ((__kmp_user_lock_kind == lk_tas) &&
3038 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3039 OMP_NEST_LOCK_T_SIZE)) {
3040 lck = (kmp_user_lock_p)user_lock;
3041 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003042#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003043 else if ((__kmp_user_lock_kind == lk_futex) &&
3044 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3045 OMP_NEST_LOCK_T_SIZE)) {
3046 lck = (kmp_user_lock_p)user_lock;
3047 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003049 else {
3050 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3051 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
3053#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055#endif /* USE_ITT_BUILD */
3056
Joachim Protze82e94a52017-11-01 10:08:30 +00003057#if OMPT_SUPPORT && OMPT_OPTIONAL
3058 // This is the case, if called from omp_init_lock_with_hint:
3059 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3060 if (!codeptr)
3061 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3062 if (ompt_enabled.enabled) &&
3063 ompt_enabled.ompt_callback_mutex_acquire) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3065 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003066 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003067 }
3068#endif
3069
Jonathan Peyton30419822017-05-12 18:01:32 +00003070 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003071#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 if (rc) {
3073 __kmp_itt_lock_acquired(lck);
3074 } else {
3075 __kmp_itt_lock_cancelled(lck);
3076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003078#if OMPT_SUPPORT && OMPT_OPTIONAL
3079 if (ompt_enabled.enabled && rc) {
3080 if (rc == 1) {
3081 if (ompt_enabled.ompt_callback_mutex_acquired) {
3082 // lock_first
3083 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003084 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003085 }
3086 } else {
3087 if (ompt_enabled.ompt_callback_nest_lock) {
3088 // lock_next
3089 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003090 ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003091 }
3092 }
3093 }
3094#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003095 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003096
Jonathan Peyton30419822017-05-12 18:01:32 +00003097/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003098
3099#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003100}
3101
Jonathan Peyton30419822017-05-12 18:01:32 +00003102// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104// keep the selected method in a thread local structure for cross-function
3105// usage: will be used in __kmpc_end_reduce* functions;
3106// another solution: to re-determine the method one more time in
3107// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003109#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3110 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003111
Jonathan Peyton30419822017-05-12 18:01:32 +00003112#define __KMP_GET_REDUCTION_METHOD(gtid) \
3113 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003114
Jonathan Peyton30419822017-05-12 18:01:32 +00003115// description of the packed_reduction_method variable: look at the macros in
3116// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003117
3118// used in a critical section reduce block
3119static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003120__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3121 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003122
Jonathan Peyton30419822017-05-12 18:01:32 +00003123 // this lock was visible to a customer and to the threading profile tool as a
3124 // serial overhead span (although it's used for an internal purpose only)
3125 // why was it visible in previous implementation?
3126 // should we keep it visible in new reduce block?
3127 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003128
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003129#if KMP_USE_DYNAMIC_LOCK
3130
Jonathan Peyton30419822017-05-12 18:01:32 +00003131 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3132 // Check if it is initialized.
3133 if (*lk == 0) {
3134 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3135 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3136 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003137 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003138 __kmp_init_indirect_csptr(crit, loc, global_tid,
3139 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003140 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003141 }
3142 // Branch for accessing the actual lock object and set operation. This
3143 // branching is inevitable since this lock initialization does not follow the
3144 // normal dispatch path (lock table is not used).
3145 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3146 lck = (kmp_user_lock_p)lk;
3147 KMP_DEBUG_ASSERT(lck != NULL);
3148 if (__kmp_env_consistency_check) {
3149 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3150 }
3151 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3152 } else {
3153 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3154 lck = ilk->lock;
3155 KMP_DEBUG_ASSERT(lck != NULL);
3156 if (__kmp_env_consistency_check) {
3157 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3158 }
3159 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3160 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003161
3162#else // KMP_USE_DYNAMIC_LOCK
3163
Jonathan Peyton30419822017-05-12 18:01:32 +00003164 // We know that the fast reduction code is only emitted by Intel compilers
3165 // with 32 byte critical sections. If there isn't enough space, then we
3166 // have to use a pointer.
3167 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3168 lck = (kmp_user_lock_p)crit;
3169 } else {
3170 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3171 }
3172 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 if (__kmp_env_consistency_check)
3175 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003176
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003178
3179#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003180}
3181
3182// used in a critical section reduce block
3183static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003184__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3185 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003186
Jonathan Peyton30419822017-05-12 18:01:32 +00003187 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003188
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003189#if KMP_USE_DYNAMIC_LOCK
3190
Jonathan Peyton30419822017-05-12 18:01:32 +00003191 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3192 lck = (kmp_user_lock_p)crit;
3193 if (__kmp_env_consistency_check)
3194 __kmp_pop_sync(global_tid, ct_critical, loc);
3195 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3196 } else {
3197 kmp_indirect_lock_t *ilk =
3198 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3199 if (__kmp_env_consistency_check)
3200 __kmp_pop_sync(global_tid, ct_critical, loc);
3201 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3202 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003203
3204#else // KMP_USE_DYNAMIC_LOCK
3205
Jonathan Peyton30419822017-05-12 18:01:32 +00003206 // We know that the fast reduction code is only emitted by Intel compilers
3207 // with 32 byte critical sections. If there isn't enough space, then we have
3208 // to use a pointer.
3209 if (__kmp_base_user_lock_size > 32) {
3210 lck = *((kmp_user_lock_p *)crit);
3211 KMP_ASSERT(lck != NULL);
3212 } else {
3213 lck = (kmp_user_lock_p)crit;
3214 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003215
Jonathan Peyton30419822017-05-12 18:01:32 +00003216 if (__kmp_env_consistency_check)
3217 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003218
Jonathan Peyton30419822017-05-12 18:01:32 +00003219 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003221#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222} // __kmp_end_critical_section_reduce_block
3223
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003224#if OMP_40_ENABLED
3225static __forceinline int
3226__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3227 int *task_state) {
3228 kmp_team_t *team;
3229
3230 // Check if we are inside the teams construct?
3231 if (th->th.th_teams_microtask) {
3232 *team_p = team = th->th.th_team;
3233 if (team->t.t_level == th->th.th_teams_level) {
3234 // This is reduction at teams construct.
3235 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3236 // Let's swap teams temporarily for the reduction.
3237 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3238 th->th.th_team = team->t.t_parent;
3239 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3240 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3241 *task_state = th->th.th_task_state;
3242 th->th.th_task_state = 0;
3243
3244 return 1;
3245 }
3246 }
3247 return 0;
3248}
3249
3250static __forceinline void
3251__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3252 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3253 th->th.th_info.ds.ds_tid = 0;
3254 th->th.th_team = team;
3255 th->th.th_team_nproc = team->t.t_nproc;
3256 th->th.th_task_team = team->t.t_task_team[task_state];
3257 th->th.th_task_state = task_state;
3258}
3259#endif
3260
Jim Cownie5e8470a2013-09-27 10:38:44 +00003261/* 2.a.i. Reduce Block without a terminating barrier */
3262/*!
3263@ingroup SYNCHRONIZATION
3264@param loc source location information
3265@param global_tid global thread number
3266@param num_vars number of items (variables) to be reduced
3267@param reduce_size size of data in bytes to be reduced
3268@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003269@param reduce_func callback function providing reduction operation on two
3270operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003271@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003272@result 1 for the master thread, 0 for all other team threads, 2 for all team
3273threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003274
3275The nowait version is used for a reduce clause with the nowait argument.
3276*/
3277kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003278__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3279 size_t reduce_size, void *reduce_data,
3280 void (*reduce_func)(void *lhs_data, void *rhs_data),
3281 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 KMP_COUNT_BLOCK(REDUCE_nowait);
3284 int retval = 0;
3285 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003286#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003287 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003288 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003289 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003290#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003291 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003292
Jonathan Peyton30419822017-05-12 18:01:32 +00003293 // why do we need this initialization here at all?
3294 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003295
Jonathan Peyton30419822017-05-12 18:01:32 +00003296 // do not call __kmp_serial_initialize(), it will be called by
3297 // __kmp_parallel_initialize() if needed
3298 // possible detection of false-positive race by the threadchecker ???
3299 if (!TCR_4(__kmp_init_parallel))
3300 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003301
Jonathan Peyton30419822017-05-12 18:01:32 +00003302// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003303#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003304 if (__kmp_env_consistency_check)
3305 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003306#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003307 if (__kmp_env_consistency_check)
3308 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003309#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003310
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003311#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003312 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003313 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003314#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003315
Jonathan Peyton30419822017-05-12 18:01:32 +00003316 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3317 // the value should be kept in a variable
3318 // the variable should be either a construct-specific or thread-specific
3319 // property, not a team specific property
3320 // (a thread can reach the next reduce block on the next construct, reduce
3321 // method may differ on the next construct)
3322 // an ident_t "loc" parameter could be used as a construct-specific property
3323 // (what if loc == 0?)
3324 // (if both construct-specific and team-specific variables were shared,
3325 // then unness extra syncs should be needed)
3326 // a thread-specific variable is better regarding two issues above (next
3327 // construct and extra syncs)
3328 // a thread-specific "th_local.reduction_method" variable is used currently
3329 // each thread executes 'determine' and 'set' lines (no need to execute by one
3330 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003331
Jonathan Peyton30419822017-05-12 18:01:32 +00003332 packed_reduction_method = __kmp_determine_reduction_method(
3333 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3334 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003335
Jonathan Peyton30419822017-05-12 18:01:32 +00003336 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337
Jonathan Peyton30419822017-05-12 18:01:32 +00003338 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3339 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Jonathan Peyton30419822017-05-12 18:01:32 +00003341 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003342
Jonathan Peyton30419822017-05-12 18:01:32 +00003343 // usage: if team size == 1, no synchronization is required ( Intel
3344 // platforms only )
3345 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003346
Jonathan Peyton30419822017-05-12 18:01:32 +00003347 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003348
Jonathan Peyton30419822017-05-12 18:01:32 +00003349 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003350
Jonathan Peyton30419822017-05-12 18:01:32 +00003351 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3352 // won't be called by the code gen)
3353 // (it's not quite good, because the checking block has been closed by
3354 // this 'pop',
3355 // but atomic operation has not been executed yet, will be executed
3356 // slightly later, literally on next instruction)
3357 if (__kmp_env_consistency_check)
3358 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003359
Jonathan Peyton30419822017-05-12 18:01:32 +00003360 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3361 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003362
Jonathan Peyton30419822017-05-12 18:01:32 +00003363// AT: performance issue: a real barrier here
3364// AT: (if master goes slow, other threads are blocked here waiting for the
3365// master to come and release them)
3366// AT: (it's not what a customer might expect specifying NOWAIT clause)
3367// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3368// be confusing to a customer)
3369// AT: another implementation of *barrier_gather*nowait() (or some other design)
3370// might go faster and be more in line with sense of NOWAIT
3371// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003372
Jonathan Peyton30419822017-05-12 18:01:32 +00003373// this barrier should be invisible to a customer and to the threading profile
3374// tool (it's neither a terminating barrier nor customer's code, it's
3375// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003376#if OMPT_SUPPORT
3377 // JP: can this barrier potentially leed to task scheduling?
3378 // JP: as long as there is a barrier in the implementation, OMPT should and
3379 // will provide the barrier events
3380 // so we set-up the necessary frame/return addresses.
Joachim Protzec5836064b2018-05-28 08:14:58 +00003381 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003382 if (ompt_enabled.enabled) {
3383 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003384 if (ompt_frame->enter_frame == NULL)
3385 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003386 OMPT_STORE_RETURN_ADDRESS(global_tid);
3387 }
3388#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003389#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003390 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003391#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003392 retval =
3393 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3394 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3395 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003396#if OMPT_SUPPORT && OMPT_OPTIONAL
3397 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003398 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003399 }
3400#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401
Jonathan Peyton30419822017-05-12 18:01:32 +00003402 // all other workers except master should do this pop here
3403 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3404 if (__kmp_env_consistency_check) {
3405 if (retval == 0) {
3406 __kmp_pop_sync(global_tid, ct_reduce, loc);
3407 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003408 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003409
3410 } else {
3411
3412 // should never reach this block
3413 KMP_ASSERT(0); // "unexpected method"
3414 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003415#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003417 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 KA_TRACE(
3421 10,
3422 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3423 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003424
Jonathan Peyton30419822017-05-12 18:01:32 +00003425 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426}
3427
3428/*!
3429@ingroup SYNCHRONIZATION
3430@param loc source location information
3431@param global_tid global thread id.
3432@param lck pointer to the unique lock data structure
3433
3434Finish the execution of a reduce nowait.
3435*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003436void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3437 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438
Jonathan Peyton30419822017-05-12 18:01:32 +00003439 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440
Jonathan Peyton30419822017-05-12 18:01:32 +00003441 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
Jonathan Peyton30419822017-05-12 18:01:32 +00003447 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003450
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 // usage: if team size == 1, no synchronization is required ( on Intel
3452 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003453
Jonathan Peyton30419822017-05-12 18:01:32 +00003454 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 // neither master nor other workers should get here
3457 // (code gen does not generate this call in case 2: atomic reduce block)
3458 // actually it's better to remove this elseif at all;
3459 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003460
Jonathan Peyton30419822017-05-12 18:01:32 +00003461 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3462 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003463
Jonathan Peyton30419822017-05-12 18:01:32 +00003464 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465
Jonathan Peyton30419822017-05-12 18:01:32 +00003466 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003467
Jonathan Peyton30419822017-05-12 18:01:32 +00003468 // should never reach this block
3469 KMP_ASSERT(0); // "unexpected method"
3470 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003471
Jonathan Peyton30419822017-05-12 18:01:32 +00003472 if (__kmp_env_consistency_check)
3473 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003474
Jonathan Peyton30419822017-05-12 18:01:32 +00003475 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3476 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003477
Jonathan Peyton30419822017-05-12 18:01:32 +00003478 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479}
3480
3481/* 2.a.ii. Reduce Block with a terminating barrier */
3482
3483/*!
3484@ingroup SYNCHRONIZATION
3485@param loc source location information
3486@param global_tid global thread number
3487@param num_vars number of items (variables) to be reduced
3488@param reduce_size size of data in bytes to be reduced
3489@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003490@param reduce_func callback function providing reduction operation on two
3491operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003492@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003493@result 1 for the master thread, 0 for all other team threads, 2 for all team
3494threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003495
3496A blocking reduce that includes an implicit barrier.
3497*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003498kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3499 size_t reduce_size, void *reduce_data,
3500 void (*reduce_func)(void *lhs_data, void *rhs_data),
3501 kmp_critical_name *lck) {
3502 KMP_COUNT_BLOCK(REDUCE_wait);
3503 int retval = 0;
3504 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003505#if OMP_40_ENABLED
3506 kmp_info_t *th;
3507 kmp_team_t *team;
3508 int teams_swapped = 0, task_state;
3509#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003510
Jonathan Peyton30419822017-05-12 18:01:32 +00003511 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003512
Jonathan Peyton30419822017-05-12 18:01:32 +00003513 // why do we need this initialization here at all?
3514 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003515
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 // do not call __kmp_serial_initialize(), it will be called by
3517 // __kmp_parallel_initialize() if needed
3518 // possible detection of false-positive race by the threadchecker ???
3519 if (!TCR_4(__kmp_init_parallel))
3520 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003521
Jonathan Peyton30419822017-05-12 18:01:32 +00003522// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003523#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003524 if (__kmp_env_consistency_check)
3525 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003526#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 if (__kmp_env_consistency_check)
3528 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003529#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003530
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003531#if OMP_40_ENABLED
3532 th = __kmp_thread_from_gtid(global_tid);
3533 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3534#endif // OMP_40_ENABLED
3535
Jonathan Peyton30419822017-05-12 18:01:32 +00003536 packed_reduction_method = __kmp_determine_reduction_method(
3537 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3538 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539
Jonathan Peyton30419822017-05-12 18:01:32 +00003540 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3543 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544
Jonathan Peyton30419822017-05-12 18:01:32 +00003545 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546
Jonathan Peyton30419822017-05-12 18:01:32 +00003547 // usage: if team size == 1, no synchronization is required ( Intel
3548 // platforms only )
3549 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
Jonathan Peyton30419822017-05-12 18:01:32 +00003551 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003554
Jonathan Peyton30419822017-05-12 18:01:32 +00003555 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3556 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003557
Jonathan Peyton30419822017-05-12 18:01:32 +00003558// case tree_reduce_block:
3559// this barrier should be visible to a customer and to the threading profile
3560// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003561#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003562 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003563 if (ompt_enabled.enabled) {
3564 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003565 if (ompt_frame->enter_frame == NULL)
3566 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003567 OMPT_STORE_RETURN_ADDRESS(global_tid);
3568 }
3569#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003570#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003571 __kmp_threads[global_tid]->th.th_ident =
3572 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003573#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003574 retval =
3575 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3576 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3577 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003578#if OMPT_SUPPORT && OMPT_OPTIONAL
3579 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003580 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003581 }
3582#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003583
Jonathan Peyton30419822017-05-12 18:01:32 +00003584 // all other workers except master should do this pop here
3585 // ( none of other workers except master will enter __kmpc_end_reduce() )
3586 if (__kmp_env_consistency_check) {
3587 if (retval == 0) { // 0: all other workers; 1: master
3588 __kmp_pop_sync(global_tid, ct_reduce, loc);
3589 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003590 }
3591
Jonathan Peyton30419822017-05-12 18:01:32 +00003592 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003593
Jonathan Peyton30419822017-05-12 18:01:32 +00003594 // should never reach this block
3595 KMP_ASSERT(0); // "unexpected method"
3596 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003597#if OMP_40_ENABLED
3598 if (teams_swapped) {
3599 __kmp_restore_swapped_teams(th, team, task_state);
3600 }
3601#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003602
3603 KA_TRACE(10,
3604 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3605 global_tid, packed_reduction_method, retval));
3606
3607 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003608}
3609
3610/*!
3611@ingroup SYNCHRONIZATION
3612@param loc source location information
3613@param global_tid global thread id.
3614@param lck pointer to the unique lock data structure
3615
3616Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003617The <tt>lck</tt> pointer must be the same as that used in the corresponding
3618start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003619*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003620void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3621 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003622
Jonathan Peyton30419822017-05-12 18:01:32 +00003623 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003624#if OMP_40_ENABLED
3625 kmp_info_t *th;
3626 kmp_team_t *team;
3627 int teams_swapped = 0, task_state;
3628#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003629
Jonathan Peyton30419822017-05-12 18:01:32 +00003630 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003631
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003632#if OMP_40_ENABLED
3633 th = __kmp_thread_from_gtid(global_tid);
3634 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3635#endif // OMP_40_ENABLED
3636
Jonathan Peyton30419822017-05-12 18:01:32 +00003637 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003638
Jonathan Peyton30419822017-05-12 18:01:32 +00003639 // this barrier should be visible to a customer and to the threading profile
3640 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003641
Jonathan Peyton30419822017-05-12 18:01:32 +00003642 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
Jonathan Peyton30419822017-05-12 18:01:32 +00003646// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003647#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003648 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003649 if (ompt_enabled.enabled) {
3650 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003651 if (ompt_frame->enter_frame == NULL)
3652 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003653 OMPT_STORE_RETURN_ADDRESS(global_tid);
3654 }
3655#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003656#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003657 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003658#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003660#if OMPT_SUPPORT && OMPT_OPTIONAL
3661 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003662 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003663 }
3664#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003665
Jonathan Peyton30419822017-05-12 18:01:32 +00003666 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003667
Jonathan Peyton30419822017-05-12 18:01:32 +00003668// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003669
Jonathan Peyton30419822017-05-12 18:01:32 +00003670// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003671#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003672 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003673 if (ompt_enabled.enabled) {
3674 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003675 if (ompt_frame->enter_frame == NULL)
3676 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003677 OMPT_STORE_RETURN_ADDRESS(global_tid);
3678 }
3679#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003680#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003681 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003682#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003684#if OMPT_SUPPORT && OMPT_OPTIONAL
3685 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003686 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003687 }
3688#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689
Jonathan Peyton30419822017-05-12 18:01:32 +00003690 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003691
Joachim Protze82e94a52017-11-01 10:08:30 +00003692#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003693 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003694 if (ompt_enabled.enabled) {
3695 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003696 if (ompt_frame->enter_frame == NULL)
3697 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003698 OMPT_STORE_RETURN_ADDRESS(global_tid);
3699 }
3700#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003701// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003703 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003704#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003705 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003706#if OMPT_SUPPORT && OMPT_OPTIONAL
3707 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003708 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003709 }
3710#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711
Jonathan Peyton30419822017-05-12 18:01:32 +00003712 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3713 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714
Jonathan Peyton30419822017-05-12 18:01:32 +00003715 // only master executes here (master releases all other workers)
3716 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3717 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718
Jonathan Peyton30419822017-05-12 18:01:32 +00003719 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
Jonathan Peyton30419822017-05-12 18:01:32 +00003721 // should never reach this block
3722 KMP_ASSERT(0); // "unexpected method"
3723 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003724#if OMP_40_ENABLED
3725 if (teams_swapped) {
3726 __kmp_restore_swapped_teams(th, team, task_state);
3727 }
3728#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003729
Jonathan Peyton30419822017-05-12 18:01:32 +00003730 if (__kmp_env_consistency_check)
3731 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732
Jonathan Peyton30419822017-05-12 18:01:32 +00003733 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3734 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737}
3738
3739#undef __KMP_GET_REDUCTION_METHOD
3740#undef __KMP_SET_REDUCTION_METHOD
3741
Jonathan Peyton30419822017-05-12 18:01:32 +00003742/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743
Jonathan Peyton30419822017-05-12 18:01:32 +00003744kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
Jonathan Peyton30419822017-05-12 18:01:32 +00003746 kmp_int32 gtid;
3747 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748
Jonathan Peyton30419822017-05-12 18:01:32 +00003749 gtid = __kmp_get_gtid();
3750 if (gtid < 0) {
3751 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003752 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 thread = __kmp_thread_from_gtid(gtid);
3754 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755
3756} // __kmpc_get_taskid
3757
Jonathan Peyton30419822017-05-12 18:01:32 +00003758kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759
Jonathan Peyton30419822017-05-12 18:01:32 +00003760 kmp_int32 gtid;
3761 kmp_info_t *thread;
3762 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003763
Jonathan Peyton30419822017-05-12 18:01:32 +00003764 gtid = __kmp_get_gtid();
3765 if (gtid < 0) {
3766 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003767 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003768 thread = __kmp_thread_from_gtid(gtid);
3769 parent_task = thread->th.th_current_task->td_parent;
3770 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003771
3772} // __kmpc_get_parent_taskid
3773
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003774#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003775/*!
3776@ingroup WORK_SHARING
3777@param loc source location information.
3778@param gtid global thread number.
3779@param num_dims number of associated doacross loops.
3780@param dims info on loops bounds.
3781
3782Initialize doacross loop information.
3783Expect compiler send us inclusive bounds,
3784e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3785*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003786void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003787 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003788 int j, idx;
3789 kmp_int64 last, trace_count;
3790 kmp_info_t *th = __kmp_threads[gtid];
3791 kmp_team_t *team = th->th.th_team;
3792 kmp_uint32 *flags;
3793 kmp_disp_t *pr_buf = th->th.th_dispatch;
3794 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003795
Jonathan Peyton30419822017-05-12 18:01:32 +00003796 KA_TRACE(
3797 20,
3798 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3799 gtid, num_dims, !team->t.t_serialized));
3800 KMP_DEBUG_ASSERT(dims != NULL);
3801 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003802
Jonathan Peyton30419822017-05-12 18:01:32 +00003803 if (team->t.t_serialized) {
3804 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3805 return; // no dependencies if team is serialized
3806 }
3807 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3808 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3809 // the next loop
3810 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003811
Jonathan Peyton30419822017-05-12 18:01:32 +00003812 // Save bounds info into allocated private buffer
3813 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3814 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3815 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3816 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3817 pr_buf->th_doacross_info[0] =
3818 (kmp_int64)num_dims; // first element is number of dimensions
3819 // Save also address of num_done in order to access it later without knowing
3820 // the buffer index
3821 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3822 pr_buf->th_doacross_info[2] = dims[0].lo;
3823 pr_buf->th_doacross_info[3] = dims[0].up;
3824 pr_buf->th_doacross_info[4] = dims[0].st;
3825 last = 5;
3826 for (j = 1; j < num_dims; ++j) {
3827 kmp_int64
3828 range_length; // To keep ranges of all dimensions but the first dims[0]
3829 if (dims[j].st == 1) { // most common case
3830 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3831 range_length = dims[j].up - dims[j].lo + 1;
3832 } else {
3833 if (dims[j].st > 0) {
3834 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3835 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3836 } else { // negative increment
3837 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3838 range_length =
3839 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3840 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003841 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003842 pr_buf->th_doacross_info[last++] = range_length;
3843 pr_buf->th_doacross_info[last++] = dims[j].lo;
3844 pr_buf->th_doacross_info[last++] = dims[j].up;
3845 pr_buf->th_doacross_info[last++] = dims[j].st;
3846 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003847
Jonathan Peyton30419822017-05-12 18:01:32 +00003848 // Compute total trip count.
3849 // Start with range of dims[0] which we don't need to keep in the buffer.
3850 if (dims[0].st == 1) { // most common case
3851 trace_count = dims[0].up - dims[0].lo + 1;
3852 } else if (dims[0].st > 0) {
3853 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3854 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3855 } else { // negative increment
3856 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3857 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3858 }
3859 for (j = 1; j < num_dims; ++j) {
3860 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3861 }
3862 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003863
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 // Check if shared buffer is not occupied by other loop (idx -
3865 // __kmp_dispatch_num_buffers)
3866 if (idx != sh_buf->doacross_buf_idx) {
3867 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003868 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3869 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003870 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003871#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003872 // Check if we are the first thread. After the CAS the first thread gets 0,
3873 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003874 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3875 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3876 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3877#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003878 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003879 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3880#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003881 if (flags == NULL) {
3882 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003883 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003884 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3885 KMP_MB();
3886 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003887 } else if (flags == (kmp_uint32 *)1) {
3888#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003889 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003890 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3891#else
3892 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3893#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003894 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003895 KMP_MB();
3896 } else {
3897 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003898 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003899 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 pr_buf->th_doacross_flags =
3901 sh_buf->doacross_flags; // save private copy in order to not
3902 // touch shared buffer on each iteration
3903 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003904}
3905
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003906void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003907 kmp_int32 shft, num_dims, i;
3908 kmp_uint32 flag;
3909 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3910 kmp_info_t *th = __kmp_threads[gtid];
3911 kmp_team_t *team = th->th.th_team;
3912 kmp_disp_t *pr_buf;
3913 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003914
Jonathan Peyton30419822017-05-12 18:01:32 +00003915 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3916 if (team->t.t_serialized) {
3917 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3918 return; // no dependencies if team is serialized
3919 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003920
Jonathan Peyton30419822017-05-12 18:01:32 +00003921 // calculate sequential iteration number and check out-of-bounds condition
3922 pr_buf = th->th.th_dispatch;
3923 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3924 num_dims = pr_buf->th_doacross_info[0];
3925 lo = pr_buf->th_doacross_info[2];
3926 up = pr_buf->th_doacross_info[3];
3927 st = pr_buf->th_doacross_info[4];
3928 if (st == 1) { // most common case
3929 if (vec[0] < lo || vec[0] > up) {
3930 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3931 "bounds [%lld,%lld]\n",
3932 gtid, vec[0], lo, up));
3933 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003934 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003935 iter_number = vec[0] - lo;
3936 } else if (st > 0) {
3937 if (vec[0] < lo || vec[0] > up) {
3938 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3939 "bounds [%lld,%lld]\n",
3940 gtid, vec[0], lo, up));
3941 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003942 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003943 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3944 } else { // negative increment
3945 if (vec[0] > lo || vec[0] < up) {
3946 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3947 "bounds [%lld,%lld]\n",
3948 gtid, vec[0], lo, up));
3949 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003950 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003951 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3952 }
3953 for (i = 1; i < num_dims; ++i) {
3954 kmp_int64 iter, ln;
3955 kmp_int32 j = i * 4;
3956 ln = pr_buf->th_doacross_info[j + 1];
3957 lo = pr_buf->th_doacross_info[j + 2];
3958 up = pr_buf->th_doacross_info[j + 3];
3959 st = pr_buf->th_doacross_info[j + 4];
3960 if (st == 1) {
3961 if (vec[i] < lo || vec[i] > up) {
3962 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3963 "bounds [%lld,%lld]\n",
3964 gtid, vec[i], lo, up));
3965 return;
3966 }
3967 iter = vec[i] - lo;
3968 } else if (st > 0) {
3969 if (vec[i] < lo || vec[i] > up) {
3970 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3971 "bounds [%lld,%lld]\n",
3972 gtid, vec[i], lo, up));
3973 return;
3974 }
3975 iter = (kmp_uint64)(vec[i] - lo) / st;
3976 } else { // st < 0
3977 if (vec[i] > lo || vec[i] < up) {
3978 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3979 "bounds [%lld,%lld]\n",
3980 gtid, vec[i], lo, up));
3981 return;
3982 }
3983 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3984 }
3985 iter_number = iter + ln * iter_number;
3986 }
3987 shft = iter_number % 32; // use 32-bit granularity
3988 iter_number >>= 5; // divided by 32
3989 flag = 1 << shft;
3990 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3991 KMP_YIELD(TRUE);
3992 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003993 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003994 KA_TRACE(20,
3995 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3996 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003997}
3998
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003999void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004000 kmp_int32 shft, num_dims, i;
4001 kmp_uint32 flag;
4002 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4003 kmp_info_t *th = __kmp_threads[gtid];
4004 kmp_team_t *team = th->th.th_team;
4005 kmp_disp_t *pr_buf;
4006 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004007
Jonathan Peyton30419822017-05-12 18:01:32 +00004008 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4009 if (team->t.t_serialized) {
4010 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4011 return; // no dependencies if team is serialized
4012 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004013
Jonathan Peyton30419822017-05-12 18:01:32 +00004014 // calculate sequential iteration number (same as in "wait" but no
4015 // out-of-bounds checks)
4016 pr_buf = th->th.th_dispatch;
4017 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4018 num_dims = pr_buf->th_doacross_info[0];
4019 lo = pr_buf->th_doacross_info[2];
4020 st = pr_buf->th_doacross_info[4];
4021 if (st == 1) { // most common case
4022 iter_number = vec[0] - lo;
4023 } else if (st > 0) {
4024 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4025 } else { // negative increment
4026 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4027 }
4028 for (i = 1; i < num_dims; ++i) {
4029 kmp_int64 iter, ln;
4030 kmp_int32 j = i * 4;
4031 ln = pr_buf->th_doacross_info[j + 1];
4032 lo = pr_buf->th_doacross_info[j + 2];
4033 st = pr_buf->th_doacross_info[j + 4];
4034 if (st == 1) {
4035 iter = vec[i] - lo;
4036 } else if (st > 0) {
4037 iter = (kmp_uint64)(vec[i] - lo) / st;
4038 } else { // st < 0
4039 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004041 iter_number = iter + ln * iter_number;
4042 }
4043 shft = iter_number % 32; // use 32-bit granularity
4044 iter_number >>= 5; // divided by 32
4045 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004046 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004048 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004049 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4050 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004051}
4052
Jonathan Peyton30419822017-05-12 18:01:32 +00004053void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004054 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004055 kmp_info_t *th = __kmp_threads[gtid];
4056 kmp_team_t *team = th->th.th_team;
4057 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004058
Jonathan Peyton30419822017-05-12 18:01:32 +00004059 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4060 if (team->t.t_serialized) {
4061 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4062 return; // nothing to do
4063 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004064 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004065 if (num_done == th->th.th_team_nproc) {
4066 // we are the last thread, need to free shared resources
4067 int idx = pr_buf->th_doacross_buf_idx - 1;
4068 dispatch_shared_info_t *sh_buf =
4069 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4070 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4071 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004072 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004073 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004074 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004075 sh_buf->doacross_flags = NULL;
4076 sh_buf->doacross_num_done = 0;
4077 sh_buf->doacross_buf_idx +=
4078 __kmp_dispatch_num_buffers; // free buffer for future re-use
4079 }
4080 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004081 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004082 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4083 pr_buf->th_doacross_info = NULL;
4084 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004085}
4086#endif
4087
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004088#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004089int __kmpc_get_target_offload(void) {
4090 if (!__kmp_init_serial) {
4091 __kmp_serial_initialize();
4092 }
4093 return __kmp_target_offload;
4094}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004095#endif // OMP_50_ENABLED
4096
Jim Cownie5e8470a2013-09-27 10:38:44 +00004097// end of file //