blob: b5f65fa3dbff2e2f355c01bc44788de6f44f4918 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton92ca6182018-09-07 18:25:49 +000014#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#include "ompt-specific.h"
25#endif
26
Jim Cownie5e8470a2013-09-27 10:38:44 +000027#define MAX_MESSAGE 512
28
Jonathan Peyton30419822017-05-12 18:01:32 +000029// flags will be used in future, e.g. to implement openmp_strict library
30// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
32/*!
33 * @ingroup STARTUP_SHUTDOWN
34 * @param loc in source location information
35 * @param flags in for future use (currently ignored)
36 *
37 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000039 */
Jonathan Peyton30419822017-05-12 18:01:32 +000040void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41 // By default __kmpc_begin() is no-op.
42 char *env;
43 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44 __kmp_str_match_true(env)) {
45 __kmp_middle_initialize();
46 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47 } else if (__kmp_ignore_mppbeg() == FALSE) {
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 __kmp_internal_begin();
50 KC_TRACE(10, ("__kmpc_begin: called\n"));
51 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000052}
53
54/*!
55 * @ingroup STARTUP_SHUTDOWN
56 * @param loc source location information
57 *
Jonathan Peyton30419822017-05-12 18:01:32 +000058 * Shutdown the runtime library. This is also optional, and even if called will
59 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
60 * zero.
61 */
62void __kmpc_end(ident_t *loc) {
63 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66 // returns FALSE and __kmpc_end() will unregister this root (it can cause
67 // library shut down).
68 if (__kmp_ignore_mppend() == FALSE) {
69 KC_TRACE(10, ("__kmpc_end: called\n"));
70 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072 __kmp_internal_end_thread(-1);
73 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000074#if KMP_OS_WINDOWS && OMPT_SUPPORT
75 // Normal exit process on Windows does not allow worker threads of the final
76 // parallel region to finish reporting their events, so shutting down the
77 // library here fixes the issue at least for the cases where __kmpc_end() is
78 // placed properly.
79 if (ompt_enabled.enabled)
80 __kmp_internal_end_library(__kmp_gtid_get_specific());
81#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000082}
83
84/*!
85@ingroup THREAD_STATES
86@param loc Source location information.
87@return The global thread index of the active thread.
88
89This function can be called in any context.
90
91If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000092single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
93that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000094active parallel construct. (Or zero if there is no active parallel
95construct, since the master thread is necessarily thread zero).
96
97If multiple non-OpenMP threads all enter an OpenMP construct then this
98will be a unique thread identifier among all the threads created by
99the OpenMP runtime (but the value cannote be defined in terms of
100OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000102kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
103 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104
Jonathan Peyton30419822017-05-12 18:01:32 +0000105 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106
Jonathan Peyton30419822017-05-12 18:01:32 +0000107 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000108}
109
110/*!
111@ingroup THREAD_STATES
112@param loc Source location information.
113@return The number of threads under control of the OpenMP<sup>*</sup> runtime
114
115This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000116It returns the total number of threads under the control of the OpenMP runtime.
117That is not a number that can be determined by any OpenMP standard calls, since
118the library may be called from more than one non-OpenMP thread, and this
119reflects the total over all such calls. Similarly the runtime maintains
120underlying threads even when they are not active (since the cost of creating
121and destroying OS threads is high), this call counts all such threads even if
122they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000124kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
125 KC_TRACE(10,
126 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127
Jonathan Peyton30419822017-05-12 18:01:32 +0000128 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129}
130
131/*!
132@ingroup THREAD_STATES
133@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000134@return The thread number of the calling thread in the innermost active parallel
135construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000137kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
138 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000147kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
148 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149
Jonathan Peyton30419822017-05-12 18:01:32 +0000150 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151}
152
153/*!
154 * @ingroup DEPRECATED
155 * @param loc location description
156 *
157 * This function need not be called. It always returns TRUE.
158 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000159kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160#ifndef KMP_DEBUG
161
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163
164#else
165
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 const char *semi2;
167 const char *semi3;
168 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
173 semi2 = loc->psource;
174 if (semi2 == NULL) {
175 return TRUE;
176 }
177 semi2 = strchr(semi2, ';');
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 semi2 = strchr(semi2 + 1, ';');
182 if (semi2 == NULL) {
183 return TRUE;
184 }
185 if (__kmp_par_range_filename[0]) {
186 const char *name = semi2 - 1;
187 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188 name--;
189 }
190 if ((*name == '/') || (*name == ';')) {
191 name++;
192 }
193 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194 return __kmp_par_range < 0;
195 }
196 }
197 semi3 = strchr(semi2 + 1, ';');
198 if (__kmp_par_range_routine[0]) {
199 if ((semi3 != NULL) && (semi3 > semi2) &&
200 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201 return __kmp_par_range < 0;
202 }
203 }
204 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206 return __kmp_par_range > 0;
207 }
208 return __kmp_par_range < 0;
209 }
210 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211
212#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213}
214
215/*!
216@ingroup THREAD_STATES
217@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000218@return 1 if this thread is executing inside an active parallel region, zero if
219not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000221kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223}
224
225/*!
226@ingroup PARALLEL
227@param loc source location information
228@param global_tid global thread number
229@param num_threads number of threads requested for this parallel construct
230
231Set the number of threads to be used by the next fork spawned by this thread.
232This call is only required if the parallel construct has a `num_threads` clause.
233*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000234void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235 kmp_int32 num_threads) {
236 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238
Jonathan Peyton30419822017-05-12 18:01:32 +0000239 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240}
241
Jonathan Peyton30419822017-05-12 18:01:32 +0000242void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246}
247
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248#if OMP_40_ENABLED
249
Jonathan Peyton30419822017-05-12 18:01:32 +0000250void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251 kmp_int32 proc_bind) {
252 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254
Jonathan Peyton30419822017-05-12 18:01:32 +0000255 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000256}
257
258#endif /* OMP_40_ENABLED */
259
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260/*!
261@ingroup PARALLEL
262@param loc source location information
263@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000264@param microtask pointer to callback routine consisting of outlined parallel
265construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266@param ... pointers to shared variables that aren't global
267
268Do the actual fork and call the microtask in the relevant number of threads.
269*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000270void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000272
Jonathan Peyton61118492016-05-20 19:03:38 +0000273#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000274 // If we were in a serial region, then stop the serial timer, record
275 // the event, and start parallel region timer
276 stats_state_e previous_state = KMP_GET_THREAD_STATE();
277 if (previous_state == stats_state_e::SERIAL_REGION) {
278 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 } else {
280 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000282 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 if (inParallel) {
284 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285 } else {
286 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000287 }
288#endif
289
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 // maybe to save thr_state is enough here
291 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000292 va_list ap;
293 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000295#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000296 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000297 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000298 kmp_info_t *master_th = __kmp_threads[gtid];
299 kmp_team_t *parent_team = master_th->th.th_team;
300 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 if (lwt)
302 ompt_frame = &(lwt->ompt_task_info.frame);
303 else {
304 int tid = __kmp_tid_from_gtid(gtid);
305 ompt_frame = &(
306 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000308 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000309 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000320#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000326#if INCLUDE_SSC_MARKS
327 SSC_MARK_JOINING();
328#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000329 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000330#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000331 ,
332 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000334 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335
Jonathan Peyton30419822017-05-12 18:01:32 +0000336 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000338
339#if KMP_STATS_ENABLED
340 if (previous_state == stats_state_e::SERIAL_REGION) {
341 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 } else {
343 KMP_POP_PARTITIONED_TIMER();
344 }
345#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346}
347
348#if OMP_40_ENABLED
349/*!
350@ingroup PARALLEL
351@param loc source location information
352@param global_tid global thread number
353@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000354@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355
356Set the number of teams to be used by the teams construct.
357This call is only required if the teams construct has a `num_teams` clause
358or a `thread_limit` clause (or both).
359*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000360void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361 kmp_int32 num_teams, kmp_int32 num_threads) {
362 KA_TRACE(20,
363 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365
Jonathan Peyton30419822017-05-12 18:01:32 +0000366 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367}
368
369/*!
370@ingroup PARALLEL
371@param loc source location information
372@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000373@param microtask pointer to callback routine consisting of outlined teams
374construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000375@param ... pointers to shared variables that aren't global
376
377Do the actual fork and call the microtask in the relevant number of threads.
378*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000379void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380 ...) {
381 int gtid = __kmp_entry_gtid();
382 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_list ap;
384 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000385
Jonathan Peyton30419822017-05-12 18:01:32 +0000386 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000387
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 // remember teams entry point and nesting level
389 this_thr->th.th_teams_microtask = microtask;
390 this_thr->th.th_teams_level =
391 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000393#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 kmp_team_t *parent_team = this_thr->th.th_team;
395 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000396 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000397 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000398 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000400 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000401#endif
402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 // check if __kmpc_push_num_teams called, set default number of teams
404 // otherwise
405 if (this_thr->th.th_teams_size.nteams == 0) {
406 __kmp_push_num_teams(loc, gtid, 0, 0);
407 }
408 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000413 VOLATILE_CAST(microtask_t)
414 __kmp_teams_master, // "wrapped" task
415 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000416#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000417 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000420#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000421 );
422 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000423#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000424 ,
425 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000426#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000427 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000428
Jonathan Peyton30419822017-05-12 18:01:32 +0000429 this_thr->th.th_teams_microtask = NULL;
430 this_thr->th.th_teams_level = 0;
431 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433}
434#endif /* OMP_40_ENABLED */
435
Jim Cownie5e8470a2013-09-27 10:38:44 +0000436// I don't think this function should ever have been exported.
437// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438// openmp code ever called it, but it's been exported from the RTL for so
439// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000440int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000441
442/*!
443@ingroup PARALLEL
444@param loc source location information
445@param global_tid global thread number
446
447Enter a serialized parallel construct. This interface is used to handle a
448conditional parallel region, like this,
449@code
450#pragma omp parallel if (condition)
451@endcode
452when the condition is false.
453*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000454void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000455// The implementation is now in kmp_runtime.cpp so that it can share static
456// functions with kmp_fork_call since the tasks to be done are similar in
457// each case.
458#if OMPT_SUPPORT
459 OMPT_STORE_RETURN_ADDRESS(global_tid);
460#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000462}
463
464/*!
465@ingroup PARALLEL
466@param loc source location information
467@param global_tid global thread number
468
469Leave a serialized parallel construct.
470*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000471void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472 kmp_internal_control_t *top;
473 kmp_info_t *this_thr;
474 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 KC_TRACE(10,
477 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 /* skip all this code for autopar serialized loops since it results in
480 unacceptable overhead */
481 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483
Jonathan Peyton30419822017-05-12 18:01:32 +0000484 // Not autopar code
485 if (!TCR_4(__kmp_init_parallel))
486 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000488#if OMP_50_ENABLED
489 __kmp_resume_if_soft_paused();
490#endif
491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 this_thr = __kmp_threads[global_tid];
493 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000494
Jonathan Peyton30419822017-05-12 18:01:32 +0000495#if OMP_45_ENABLED
496 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000497
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 // we need to wait for the proxy tasks before finishing the thread
499 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
500 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
501#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 KMP_MB();
504 KMP_DEBUG_ASSERT(serial_team);
505 KMP_ASSERT(serial_team->t.t_serialized);
506 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
507 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
508 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
509 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Joachim Protze82e94a52017-11-01 10:08:30 +0000511#if OMPT_SUPPORT
512 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000513 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
514 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000515 if (ompt_enabled.ompt_callback_implicit_task) {
516 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
517 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000518 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000519 }
520
521 // reset clear the task id only after unlinking the task
522 ompt_data_t *parent_task_data;
523 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
524
525 if (ompt_enabled.ompt_callback_parallel_end) {
526 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
527 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000528 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000529 }
530 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000531 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000532 }
533#endif
534
Jonathan Peyton30419822017-05-12 18:01:32 +0000535 /* If necessary, pop the internal control stack values and replace the team
536 * values */
537 top = serial_team->t.t_control_stack_top;
538 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
539 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
540 serial_team->t.t_control_stack_top = top->next;
541 __kmp_free(top);
542 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 // if( serial_team -> t.t_serialized > 1 )
545 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000546
Jonathan Peyton30419822017-05-12 18:01:32 +0000547 /* pop dispatch buffers stack */
548 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
549 {
550 dispatch_private_info_t *disp_buffer =
551 serial_team->t.t_dispatch->th_disp_buffer;
552 serial_team->t.t_dispatch->th_disp_buffer =
553 serial_team->t.t_dispatch->th_disp_buffer->next;
554 __kmp_free(disp_buffer);
555 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000556#if OMP_50_ENABLED
557 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
558#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559
Jonathan Peyton30419822017-05-12 18:01:32 +0000560 --serial_team->t.t_serialized;
561 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564
565#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
567 __kmp_clear_x87_fpu_status_word();
568 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
569 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
572
Jonathan Peyton30419822017-05-12 18:01:32 +0000573 this_thr->th.th_team = serial_team->t.t_parent;
574 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575
Jonathan Peyton30419822017-05-12 18:01:32 +0000576 /* restore values cached in the thread */
577 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
578 this_thr->th.th_team_master =
579 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
580 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581
Jonathan Peyton30419822017-05-12 18:01:32 +0000582 /* TODO the below shouldn't need to be adjusted for serialized teams */
583 this_thr->th.th_dispatch =
584 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585
Jonathan Peyton30419822017-05-12 18:01:32 +0000586 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
589 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000590
Jonathan Peyton30419822017-05-12 18:01:32 +0000591 if (__kmp_tasking_mode != tskm_immediate_exec) {
592 // Copy the task team from the new child / old parent team to the thread.
593 this_thr->th.th_task_team =
594 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
595 KA_TRACE(20,
596 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
597 "team %p\n",
598 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000600 } else {
601 if (__kmp_tasking_mode != tskm_immediate_exec) {
602 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
603 "depth of serial team %p to %d\n",
604 global_tid, serial_team, serial_team->t.t_serialized));
605 }
606 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607
Jonathan Peyton30419822017-05-12 18:01:32 +0000608 if (__kmp_env_consistency_check)
609 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000610#if OMPT_SUPPORT
611 if (ompt_enabled.enabled)
612 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000613 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
614 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000615#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616}
617
618/*!
619@ingroup SYNCHRONIZATION
620@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000621
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000622Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000623depending on the memory ordering convention obeyed by the compiler
624even that may not be necessary).
625*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000626void __kmpc_flush(ident_t *loc) {
627 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000628
Jonathan Peyton30419822017-05-12 18:01:32 +0000629 /* need explicit __mf() here since use volatile instead in library */
630 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631
Jonathan Peyton30419822017-05-12 18:01:32 +0000632#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
633#if KMP_MIC
634// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
635// We shouldn't need it, though, since the ABI rules require that
636// * If the compiler generates NGO stores it also generates the fence
637// * If users hand-code NGO stores they should insert the fence
638// therefore no incomplete unordered stores should be visible.
639#else
640 // C74404
641 // This is to address non-temporal store instructions (sfence needed).
642 // The clflush instruction is addressed either (mfence needed).
643 // Probably the non-temporal load monvtdqa instruction should also be
644 // addressed.
645 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
646 if (!__kmp_cpuinfo.initialized) {
647 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000648 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000649 if (!__kmp_cpuinfo.sse2) {
650 // CPU cannot execute SSE2 instructions.
651 } else {
652#if KMP_COMPILER_ICC
653 _mm_mfence();
654#elif KMP_COMPILER_MSVC
655 MemoryBarrier();
656#else
657 __sync_synchronize();
658#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000659 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000660#endif // KMP_MIC
661#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
662// Nothing to see here move along
663#elif KMP_ARCH_PPC64
664// Nothing needed here (we have a real MB above).
665#if KMP_OS_CNK
666 // The flushing thread needs to yield here; this prevents a
667 // busy-waiting thread from saturating the pipeline. flush is
668 // often used in loops like this:
669 // while (!flag) {
670 // #pragma omp flush(flag)
671 // }
672 // and adding the yield here is good for at least a 10x speedup
673 // when running >2 threads per core (on the NAS LU benchmark).
674 __kmp_yield(TRUE);
675#endif
676#else
677#error Unknown or unsupported architecture
678#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000679
680#if OMPT_SUPPORT && OMPT_OPTIONAL
681 if (ompt_enabled.ompt_callback_flush) {
682 ompt_callbacks.ompt_callback(ompt_callback_flush)(
683 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
684 }
685#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686}
687
688/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000689/*!
690@ingroup SYNCHRONIZATION
691@param loc source location information
692@param global_tid thread id.
693
694Execute a barrier.
695*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000696void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
697 KMP_COUNT_BLOCK(OMP_BARRIER);
698 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000699
Jonathan Peyton30419822017-05-12 18:01:32 +0000700 if (!TCR_4(__kmp_init_parallel))
701 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000703#if OMP_50_ENABLED
704 __kmp_resume_if_soft_paused();
705#endif
706
Jonathan Peyton30419822017-05-12 18:01:32 +0000707 if (__kmp_env_consistency_check) {
708 if (loc == 0) {
709 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000710 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 __kmp_check_barrier(global_tid, ct_barrier, loc);
713 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
Joachim Protze82e94a52017-11-01 10:08:30 +0000715#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000716 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000717 if (ompt_enabled.enabled) {
718 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000719 if (ompt_frame->enter_frame.ptr == NULL)
720 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000721 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000722 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000723#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 __kmp_threads[global_tid]->th.th_ident = loc;
725 // TODO: explicit barrier_wait_id:
726 // this function is called when 'barrier' directive is present or
727 // implicit barrier at the end of a worksharing construct.
728 // 1) better to add a per-thread barrier counter to a thread data structure
729 // 2) set to 0 when a new team is created
730 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731
Jonathan Peyton30419822017-05-12 18:01:32 +0000732 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000733#if OMPT_SUPPORT && OMPT_OPTIONAL
734 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000735 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000737#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738}
739
740/* The BARRIER for a MASTER section is always explicit */
741/*!
742@ingroup WORK_SHARING
743@param loc source location information.
744@param global_tid global thread number .
745@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
746*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000747kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
748 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749
Jonathan Peyton30419822017-05-12 18:01:32 +0000750 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751
Jonathan Peyton30419822017-05-12 18:01:32 +0000752 if (!TCR_4(__kmp_init_parallel))
753 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000755#if OMP_50_ENABLED
756 __kmp_resume_if_soft_paused();
757#endif
758
Jonathan Peyton30419822017-05-12 18:01:32 +0000759 if (KMP_MASTER_GTID(global_tid)) {
760 KMP_COUNT_BLOCK(OMP_MASTER);
761 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
762 status = 1;
763 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000764
Joachim Protze82e94a52017-11-01 10:08:30 +0000765#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000767 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 kmp_info_t *this_thr = __kmp_threads[global_tid];
769 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000770
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000772 ompt_callbacks.ompt_callback(ompt_callback_master)(
773 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
774 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
775 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000776 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000777 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000778#endif
779
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000781#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000782 if (status)
783 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
784 else
785 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000786#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 if (status)
788 __kmp_push_sync(global_tid, ct_master, loc, NULL);
789 else
790 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000791#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795}
796
797/*!
798@ingroup WORK_SHARING
799@param loc source location information.
800@param global_tid global thread number .
801
Jonathan Peyton30419822017-05-12 18:01:32 +0000802Mark the end of a <tt>master</tt> region. This should only be called by the
803thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000805void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
806 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
809 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000810
Joachim Protze82e94a52017-11-01 10:08:30 +0000811#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000812 kmp_info_t *this_thr = __kmp_threads[global_tid];
813 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000814 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000815 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000816 ompt_callbacks.ompt_callback(ompt_callback_master)(
817 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
818 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
819 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000821#endif
822
Jonathan Peyton30419822017-05-12 18:01:32 +0000823 if (__kmp_env_consistency_check) {
824 if (global_tid < 0)
825 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000826
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 if (KMP_MASTER_GTID(global_tid))
828 __kmp_pop_sync(global_tid, ct_master, loc);
829 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830}
831
832/*!
833@ingroup WORK_SHARING
834@param loc source location information.
835@param gtid global thread number.
836
837Start execution of an <tt>ordered</tt> construct.
838*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000839void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
840 int cid = 0;
841 kmp_info_t *th;
842 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000845
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 if (!TCR_4(__kmp_init_parallel))
847 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000849#if OMP_50_ENABLED
850 __kmp_resume_if_soft_paused();
851#endif
852
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 __kmp_itt_ordered_prep(gtid);
855// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856#endif /* USE_ITT_BUILD */
857
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859
Joachim Protze82e94a52017-11-01 10:08:30 +0000860#if OMPT_SUPPORT && OMPT_OPTIONAL
861 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000862 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000863 void *codeptr_ra;
864 if (ompt_enabled.enabled) {
865 OMPT_STORE_RETURN_ADDRESS(gtid);
866 team = __kmp_team_from_gtid(gtid);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000867 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000868 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000869 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000870 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000871
Jonathan Peyton30419822017-05-12 18:01:32 +0000872 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000873 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
874 if (ompt_enabled.ompt_callback_mutex_acquire) {
875 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000876 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000877 (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000878 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000879 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000880#endif
881
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 if (th->th.th_dispatch->th_deo_fcn != 0)
883 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
884 else
885 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000886
Joachim Protze82e94a52017-11-01 10:08:30 +0000887#if OMPT_SUPPORT && OMPT_OPTIONAL
888 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000889 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000890 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000891 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000892
Jonathan Peyton30419822017-05-12 18:01:32 +0000893 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000894 if (ompt_enabled.ompt_callback_mutex_acquired) {
895 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000896 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000897 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000898 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000899#endif
900
Jim Cownie5e8470a2013-09-27 10:38:44 +0000901#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000902 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000903#endif /* USE_ITT_BUILD */
904}
905
906/*!
907@ingroup WORK_SHARING
908@param loc source location information.
909@param gtid global thread number.
910
911End execution of an <tt>ordered</tt> construct.
912*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000913void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
914 int cid = 0;
915 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000916
Jonathan Peyton30419822017-05-12 18:01:32 +0000917 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000918
919#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 __kmp_itt_ordered_end(gtid);
921// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000922#endif /* USE_ITT_BUILD */
923
Jonathan Peyton30419822017-05-12 18:01:32 +0000924 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000925
Jonathan Peyton30419822017-05-12 18:01:32 +0000926 if (th->th.th_dispatch->th_dxo_fcn != 0)
927 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
928 else
929 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000930
Joachim Protze82e94a52017-11-01 10:08:30 +0000931#if OMPT_SUPPORT && OMPT_OPTIONAL
932 OMPT_STORE_RETURN_ADDRESS(gtid);
933 if (ompt_enabled.ompt_callback_mutex_released) {
934 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
935 ompt_mutex_ordered,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000936 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000937 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000939#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000940}
941
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000942#if KMP_USE_DYNAMIC_LOCK
943
Jonathan Peytondae13d82015-12-11 21:57:06 +0000944static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000945__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
946 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
947 // Pointer to the allocated indirect lock is written to crit, while indexing
948 // is ignored.
949 void *idx;
950 kmp_indirect_lock_t **lck;
951 lck = (kmp_indirect_lock_t **)crit;
952 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
953 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
954 KMP_SET_I_LOCK_LOCATION(ilk, loc);
955 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
956 KA_TRACE(20,
957 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000958#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000959 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000960#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000961 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000962 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000963#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000964 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000965#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000966 // We don't really need to destroy the unclaimed lock here since it will be
967 // cleaned up at program exit.
968 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
969 }
970 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000971}
972
973// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000974#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
975 { \
976 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000977 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
978 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
979 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
980 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000981 kmp_uint32 spins; \
982 KMP_FSYNC_PREPARE(l); \
983 KMP_INIT_YIELD(spins); \
984 if (TCR_4(__kmp_nth) > \
985 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
986 KMP_YIELD(TRUE); \
987 } else { \
988 KMP_YIELD_SPIN(spins); \
989 } \
990 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000991 while ( \
992 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
993 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 __kmp_spin_backoff(&backoff); \
995 if (TCR_4(__kmp_nth) > \
996 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
997 KMP_YIELD(TRUE); \
998 } else { \
999 KMP_YIELD_SPIN(spins); \
1000 } \
1001 } \
1002 } \
1003 KMP_FSYNC_ACQUIRED(l); \
1004 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001005
1006// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001007#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1008 { \
1009 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001010 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1011 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1012 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1013 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001014 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001015
1016// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001017#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001018 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001019
Jonathan Peytondae13d82015-12-11 21:57:06 +00001020#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001021
Jonathan Peyton30419822017-05-12 18:01:32 +00001022#include <sys/syscall.h>
1023#include <unistd.h>
1024#ifndef FUTEX_WAIT
1025#define FUTEX_WAIT 0
1026#endif
1027#ifndef FUTEX_WAKE
1028#define FUTEX_WAKE 1
1029#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001030
1031// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001032#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1033 { \
1034 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1035 kmp_int32 gtid_code = (gtid + 1) << 1; \
1036 KMP_MB(); \
1037 KMP_FSYNC_PREPARE(ftx); \
1038 kmp_int32 poll_val; \
1039 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1040 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1041 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1042 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1043 if (!cond) { \
1044 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1045 poll_val | \
1046 KMP_LOCK_BUSY(1, futex))) { \
1047 continue; \
1048 } \
1049 poll_val |= KMP_LOCK_BUSY(1, futex); \
1050 } \
1051 kmp_int32 rc; \
1052 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1053 NULL, NULL, 0)) != 0) { \
1054 continue; \
1055 } \
1056 gtid_code |= 1; \
1057 } \
1058 KMP_FSYNC_ACQUIRED(ftx); \
1059 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001060
1061// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001062#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1063 { \
1064 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1065 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1066 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1067 KMP_FSYNC_ACQUIRED(ftx); \
1068 rc = TRUE; \
1069 } else { \
1070 rc = FALSE; \
1071 } \
1072 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001073
1074// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001075#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1076 { \
1077 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1078 KMP_MB(); \
1079 KMP_FSYNC_RELEASING(ftx); \
1080 kmp_int32 poll_val = \
1081 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1082 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1083 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1084 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1085 } \
1086 KMP_MB(); \
1087 KMP_YIELD(TCR_4(__kmp_nth) > \
1088 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1089 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001090
Jonathan Peytondae13d82015-12-11 21:57:06 +00001091#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001092
1093#else // KMP_USE_DYNAMIC_LOCK
1094
Jonathan Peyton30419822017-05-12 18:01:32 +00001095static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1096 ident_t const *loc,
1097 kmp_int32 gtid) {
1098 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001099
Jonathan Peyton30419822017-05-12 18:01:32 +00001100 // Because of the double-check, the following load doesn't need to be volatile
1101 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102
Jonathan Peyton30419822017-05-12 18:01:32 +00001103 if (lck == NULL) {
1104 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001105
Jonathan Peyton30419822017-05-12 18:01:32 +00001106 // Allocate & initialize the lock.
1107 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1108 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1109 __kmp_init_user_lock_with_checks(lck);
1110 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001112 __kmp_itt_critical_creating(lck);
1113// __kmp_itt_critical_creating() should be called *before* the first usage
1114// of underlying lock. It is the only place where we can guarantee it. There
1115// are chances the lock will destroyed with no usage, but it is not a
1116// problem, because this is not real event seen by user but rather setting
1117// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001118#endif /* USE_ITT_BUILD */
1119
Jonathan Peyton30419822017-05-12 18:01:32 +00001120 // Use a cmpxchg instruction to slam the start of the critical section with
1121 // the lock pointer. If another thread beat us to it, deallocate the lock,
1122 // and use the lock that the other thread allocated.
1123 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124
Jonathan Peyton30419822017-05-12 18:01:32 +00001125 if (status == 0) {
1126// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001127#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001128 __kmp_itt_critical_destroyed(lck);
1129// Let ITT know the lock is destroyed and the same memory location may be reused
1130// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001132 __kmp_destroy_user_lock_with_checks(lck);
1133 __kmp_user_lock_free(&idx, gtid, lck);
1134 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1135 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 }
1138 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139}
1140
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001141#endif // KMP_USE_DYNAMIC_LOCK
1142
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143/*!
1144@ingroup WORK_SHARING
1145@param loc source location information.
1146@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001147@param crit identity of the critical section. This could be a pointer to a lock
1148associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001149
1150Enter code protected by a `critical` construct.
1151This function blocks until the executing thread can enter the critical section.
1152*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001153void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1154 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001155#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001156#if OMPT_SUPPORT && OMPT_OPTIONAL
1157 OMPT_STORE_RETURN_ADDRESS(global_tid);
1158#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001159 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001160#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001162#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001163 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001164 ompt_thread_info_t ti;
1165#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001166 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001167
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001169
Jonathan Peyton30419822017-05-12 18:01:32 +00001170 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001172 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001173 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001174
Jonathan Peyton30419822017-05-12 18:01:32 +00001175 if ((__kmp_user_lock_kind == lk_tas) &&
1176 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1177 lck = (kmp_user_lock_p)crit;
1178 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001179#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 else if ((__kmp_user_lock_kind == lk_futex) &&
1181 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1182 lck = (kmp_user_lock_p)crit;
1183 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001184#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 else { // ticket, queuing or drdpa
1186 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1187 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 if (__kmp_env_consistency_check)
1190 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001191
Jonathan Peyton30419822017-05-12 18:01:32 +00001192// since the critical directive binds to all threads, not just the current
1193// team we have to check this even if we are in a serialized team.
1194// also, even if we are the uber thread, we still have to conduct the lock,
1195// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001196
1197#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001198 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001199#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001200#if OMPT_SUPPORT && OMPT_OPTIONAL
1201 OMPT_STORE_RETURN_ADDRESS(gtid);
1202 void *codeptr_ra = NULL;
1203 if (ompt_enabled.enabled) {
1204 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1205 /* OMPT state update */
1206 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001207 ti.wait_id = (ompt_wait_id_t)lck;
1208 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001209
1210 /* OMPT event callback */
1211 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1212 if (ompt_enabled.ompt_callback_mutex_acquire) {
1213 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1214 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001215 (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001216 }
1217 }
1218#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 // Value of 'crit' should be good for using as a critical_id of the critical
1220 // section directive.
1221 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001222
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001223#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001224 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001225#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001226#if OMPT_SUPPORT && OMPT_OPTIONAL
1227 if (ompt_enabled.enabled) {
1228 /* OMPT state update */
1229 ti.state = prev_state;
1230 ti.wait_id = 0;
1231
1232 /* OMPT event callback */
1233 if (ompt_enabled.ompt_callback_mutex_acquired) {
1234 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001235 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001236 }
1237 }
1238#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001239 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001240
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001241 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001243#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001244}
1245
1246#if KMP_USE_DYNAMIC_LOCK
1247
1248// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001249static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001250#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001251#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001252#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001253#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001254#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001255
1256#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001257#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001258#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001259#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001260#endif
1261
Jonathan Peyton30419822017-05-12 18:01:32 +00001262 // Hints that do not require further logic
1263 if (hint & kmp_lock_hint_hle)
1264 return KMP_TSX_LOCK(hle);
1265 if (hint & kmp_lock_hint_rtm)
1266 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1267 if (hint & kmp_lock_hint_adaptive)
1268 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001269
Jonathan Peyton30419822017-05-12 18:01:32 +00001270 // Rule out conflicting hints first by returning the default lock
1271 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001272 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001273 if ((hint & omp_lock_hint_speculative) &&
1274 (hint & omp_lock_hint_nonspeculative))
1275 return __kmp_user_lock_seq;
1276
1277 // Do not even consider speculation when it appears to be contended
1278 if (hint & omp_lock_hint_contended)
1279 return lockseq_queuing;
1280
1281 // Uncontended lock without speculation
1282 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1283 return lockseq_tas;
1284
1285 // HLE lock for speculation
1286 if (hint & omp_lock_hint_speculative)
1287 return KMP_TSX_LOCK(hle);
1288
1289 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001290}
1291
Joachim Protze82e94a52017-11-01 10:08:30 +00001292#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001293#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001294static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001295__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1296 if (user_lock) {
1297 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1298 case 0:
1299 break;
1300#if KMP_USE_FUTEX
1301 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001302 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001303#endif
1304 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001305 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001306#if KMP_USE_TSX
1307 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001308 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001309#endif
1310 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001311 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001312 }
1313 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1314 }
1315 KMP_ASSERT(ilock);
1316 switch (ilock->type) {
1317#if KMP_USE_TSX
1318 case locktag_adaptive:
1319 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001320 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001321#endif
1322 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001323 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001324#if KMP_USE_FUTEX
1325 case locktag_nested_futex:
1326#endif
1327 case locktag_ticket:
1328 case locktag_queuing:
1329 case locktag_drdpa:
1330 case locktag_nested_ticket:
1331 case locktag_nested_queuing:
1332 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001333 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001334 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001335 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001336 }
1337}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001338#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001339// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001340static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001341 switch (__kmp_user_lock_kind) {
1342 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001343 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001344#if KMP_USE_FUTEX
1345 case lk_futex:
1346#endif
1347 case lk_ticket:
1348 case lk_queuing:
1349 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001350 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001351#if KMP_USE_TSX
1352 case lk_hle:
1353 case lk_rtm:
1354 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001355 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001356#endif
1357 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001358 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001359 }
1360}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001361#endif // KMP_USE_DYNAMIC_LOCK
1362#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001363
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001364/*!
1365@ingroup WORK_SHARING
1366@param loc source location information.
1367@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001368@param crit identity of the critical section. This could be a pointer to a lock
1369associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001370@param hint the lock hint.
1371
Jonathan Peyton30419822017-05-12 18:01:32 +00001372Enter code protected by a `critical` construct with a hint. The hint value is
1373used to suggest a lock implementation. This function blocks until the executing
1374thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001375speculative execution and the hardware supports it.
1376*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001377void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001378 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001379 KMP_COUNT_BLOCK(OMP_CRITICAL);
1380 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001381#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001382 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001383 ompt_thread_info_t ti;
1384 // This is the case, if called from __kmpc_critical:
1385 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1386 if (!codeptr)
1387 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1388#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001389
Jonathan Peyton30419822017-05-12 18:01:32 +00001390 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001391
Jonathan Peyton30419822017-05-12 18:01:32 +00001392 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1393 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001394 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001395 if (*lk == 0) {
1396 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1397 if (KMP_IS_D_LOCK(lckseq)) {
1398 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1399 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001400 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001401 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001402 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001403 }
1404 // Branch for accessing the actual lock object and set operation. This
1405 // branching is inevitable since this lock initialization does not follow the
1406 // normal dispatch path (lock table is not used).
1407 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1408 lck = (kmp_user_lock_p)lk;
1409 if (__kmp_env_consistency_check) {
1410 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1411 __kmp_map_hint_to_lock(hint));
1412 }
1413#if USE_ITT_BUILD
1414 __kmp_itt_critical_acquiring(lck);
1415#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001416#if OMPT_SUPPORT && OMPT_OPTIONAL
1417 if (ompt_enabled.enabled) {
1418 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1419 /* OMPT state update */
1420 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001421 ti.wait_id = (ompt_wait_id_t)lck;
1422 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001423
1424 /* OMPT event callback */
1425 if (ompt_enabled.ompt_callback_mutex_acquire) {
1426 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1427 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001428 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001429 }
1430 }
1431#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001432#if KMP_USE_INLINED_TAS
1433 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1434 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1435 } else
1436#elif KMP_USE_INLINED_FUTEX
1437 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1438 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1439 } else
1440#endif
1441 {
1442 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1443 }
1444 } else {
1445 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1446 lck = ilk->lock;
1447 if (__kmp_env_consistency_check) {
1448 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1449 __kmp_map_hint_to_lock(hint));
1450 }
1451#if USE_ITT_BUILD
1452 __kmp_itt_critical_acquiring(lck);
1453#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001454#if OMPT_SUPPORT && OMPT_OPTIONAL
1455 if (ompt_enabled.enabled) {
1456 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1457 /* OMPT state update */
1458 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001459 ti.wait_id = (ompt_wait_id_t)lck;
1460 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001461
1462 /* OMPT event callback */
1463 if (ompt_enabled.ompt_callback_mutex_acquire) {
1464 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1465 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001466 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001467 }
1468 }
1469#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001470 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1471 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001472 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001473
Jim Cownie5e8470a2013-09-27 10:38:44 +00001474#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001475 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001476#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001477#if OMPT_SUPPORT && OMPT_OPTIONAL
1478 if (ompt_enabled.enabled) {
1479 /* OMPT state update */
1480 ti.state = prev_state;
1481 ti.wait_id = 0;
1482
1483 /* OMPT event callback */
1484 if (ompt_enabled.ompt_callback_mutex_acquired) {
1485 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001486 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001487 }
1488 }
1489#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1492 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001493} // __kmpc_critical_with_hint
1494
1495#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001496
1497/*!
1498@ingroup WORK_SHARING
1499@param loc source location information.
1500@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001501@param crit identity of the critical section. This could be a pointer to a lock
1502associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503
1504Leave a critical section, releasing any lock that was held during its execution.
1505*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001506void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1507 kmp_critical_name *crit) {
1508 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509
Jonathan Peyton30419822017-05-12 18:01:32 +00001510 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001512#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001513 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1514 lck = (kmp_user_lock_p)crit;
1515 KMP_ASSERT(lck != NULL);
1516 if (__kmp_env_consistency_check) {
1517 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001518 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001519#if USE_ITT_BUILD
1520 __kmp_itt_critical_releasing(lck);
1521#endif
1522#if KMP_USE_INLINED_TAS
1523 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1524 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1525 } else
1526#elif KMP_USE_INLINED_FUTEX
1527 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1528 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1529 } else
1530#endif
1531 {
1532 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1533 }
1534 } else {
1535 kmp_indirect_lock_t *ilk =
1536 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1537 KMP_ASSERT(ilk != NULL);
1538 lck = ilk->lock;
1539 if (__kmp_env_consistency_check) {
1540 __kmp_pop_sync(global_tid, ct_critical, loc);
1541 }
1542#if USE_ITT_BUILD
1543 __kmp_itt_critical_releasing(lck);
1544#endif
1545 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1546 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001547
1548#else // KMP_USE_DYNAMIC_LOCK
1549
Jonathan Peyton30419822017-05-12 18:01:32 +00001550 if ((__kmp_user_lock_kind == lk_tas) &&
1551 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1552 lck = (kmp_user_lock_p)crit;
1553 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001554#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001555 else if ((__kmp_user_lock_kind == lk_futex) &&
1556 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1557 lck = (kmp_user_lock_p)crit;
1558 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 else { // ticket, queuing or drdpa
1561 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1562 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001565
Jonathan Peyton30419822017-05-12 18:01:32 +00001566 if (__kmp_env_consistency_check)
1567 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
1569#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001570 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001572 // Value of 'crit' should be good for using as a critical_id of the critical
1573 // section directive.
1574 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575
Joachim Protze82e94a52017-11-01 10:08:30 +00001576#endif // KMP_USE_DYNAMIC_LOCK
1577
1578#if OMPT_SUPPORT && OMPT_OPTIONAL
1579 /* OMPT release event triggers after lock is released; place here to trigger
1580 * for all #if branches */
1581 OMPT_STORE_RETURN_ADDRESS(global_tid);
1582 if (ompt_enabled.ompt_callback_mutex_released) {
1583 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001584 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001585 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001586#endif
1587
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 KMP_POP_PARTITIONED_TIMER();
1589 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590}
1591
1592/*!
1593@ingroup SYNCHRONIZATION
1594@param loc source location information
1595@param global_tid thread id.
1596@return one if the thread should execute the master block, zero otherwise
1597
Jonathan Peyton30419822017-05-12 18:01:32 +00001598Start execution of a combined barrier and master. The barrier is executed inside
1599this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001601kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1602 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603
Jonathan Peyton30419822017-05-12 18:01:32 +00001604 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605
Jonathan Peyton30419822017-05-12 18:01:32 +00001606 if (!TCR_4(__kmp_init_parallel))
1607 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001609#if OMP_50_ENABLED
1610 __kmp_resume_if_soft_paused();
1611#endif
1612
Jonathan Peyton30419822017-05-12 18:01:32 +00001613 if (__kmp_env_consistency_check)
1614 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615
Joachim Protze82e94a52017-11-01 10:08:30 +00001616#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001617 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001618 if (ompt_enabled.enabled) {
1619 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001620 if (ompt_frame->enter_frame.ptr == NULL)
1621 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001622 OMPT_STORE_RETURN_ADDRESS(global_tid);
1623 }
1624#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001625#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001626 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001627#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001628 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001629#if OMPT_SUPPORT && OMPT_OPTIONAL
1630 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001631 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001632 }
1633#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634
Jonathan Peyton30419822017-05-12 18:01:32 +00001635 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636}
1637
1638/*!
1639@ingroup SYNCHRONIZATION
1640@param loc source location information
1641@param global_tid thread id.
1642
1643Complete the execution of a combined barrier and master. This function should
1644only be called at the completion of the <tt>master</tt> code. Other threads will
1645still be waiting at the barrier and this call releases them.
1646*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001647void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1648 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001649
Jonathan Peyton30419822017-05-12 18:01:32 +00001650 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651}
1652
1653/*!
1654@ingroup SYNCHRONIZATION
1655@param loc source location information
1656@param global_tid thread id.
1657@return one if the thread should execute the master block, zero otherwise
1658
1659Start execution of a combined barrier and master(nowait) construct.
1660The barrier is executed inside this function.
1661There is no equivalent "end" function, since the
1662*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001663kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1664 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001665
Jonathan Peyton30419822017-05-12 18:01:32 +00001666 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001667
Jonathan Peyton30419822017-05-12 18:01:32 +00001668 if (!TCR_4(__kmp_init_parallel))
1669 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001671#if OMP_50_ENABLED
1672 __kmp_resume_if_soft_paused();
1673#endif
1674
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 if (__kmp_env_consistency_check) {
1676 if (loc == 0) {
1677 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001679 __kmp_check_barrier(global_tid, ct_barrier, loc);
1680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001681
Joachim Protze82e94a52017-11-01 10:08:30 +00001682#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001683 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001684 if (ompt_enabled.enabled) {
1685 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001686 if (ompt_frame->enter_frame.ptr == NULL)
1687 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001688 OMPT_STORE_RETURN_ADDRESS(global_tid);
1689 }
1690#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001691#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001692 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001693#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001695#if OMPT_SUPPORT && OMPT_OPTIONAL
1696 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001697 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001698 }
1699#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700
Jonathan Peyton30419822017-05-12 18:01:32 +00001701 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 if (__kmp_env_consistency_check) {
1704 /* there's no __kmpc_end_master called; so the (stats) */
1705 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706
Jonathan Peyton30419822017-05-12 18:01:32 +00001707 if (global_tid < 0) {
1708 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001710 if (ret) {
1711 /* only one thread should do the pop since only */
1712 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713
Jonathan Peyton30419822017-05-12 18:01:32 +00001714 __kmp_pop_sync(global_tid, ct_master, loc);
1715 }
1716 }
1717
1718 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001719}
1720
1721/* The BARRIER for a SINGLE process section is always explicit */
1722/*!
1723@ingroup WORK_SHARING
1724@param loc source location information
1725@param global_tid global thread number
1726@return One if this thread should execute the single construct, zero otherwise.
1727
1728Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001729There are no implicit barriers in the two "single" calls, rather the compiler
1730should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001731*/
1732
Jonathan Peyton30419822017-05-12 18:01:32 +00001733kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1734 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001735
Jonathan Peyton30419822017-05-12 18:01:32 +00001736 if (rc) {
1737 // We are going to execute the single statement, so we should count it.
1738 KMP_COUNT_BLOCK(OMP_SINGLE);
1739 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1740 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001741
Joachim Protze82e94a52017-11-01 10:08:30 +00001742#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001743 kmp_info_t *this_thr = __kmp_threads[global_tid];
1744 kmp_team_t *team = this_thr->th.th_team;
1745 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001746
Joachim Protze82e94a52017-11-01 10:08:30 +00001747 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001748 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001749 if (ompt_enabled.ompt_callback_work) {
1750 ompt_callbacks.ompt_callback(ompt_callback_work)(
1751 ompt_work_single_executor, ompt_scope_begin,
1752 &(team->t.ompt_team_info.parallel_data),
1753 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1754 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001755 }
1756 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001757 if (ompt_enabled.ompt_callback_work) {
1758 ompt_callbacks.ompt_callback(ompt_callback_work)(
1759 ompt_work_single_other, ompt_scope_begin,
1760 &(team->t.ompt_team_info.parallel_data),
1761 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1762 1, OMPT_GET_RETURN_ADDRESS(0));
1763 ompt_callbacks.ompt_callback(ompt_callback_work)(
1764 ompt_work_single_other, ompt_scope_end,
1765 &(team->t.ompt_team_info.parallel_data),
1766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1767 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001768 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001769 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001770 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771#endif
1772
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001774}
1775
1776/*!
1777@ingroup WORK_SHARING
1778@param loc source location information
1779@param global_tid global thread number
1780
1781Mark the end of a <tt>single</tt> construct. This function should
1782only be called by the thread that executed the block of code protected
1783by the `single` construct.
1784*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001785void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1786 __kmp_exit_single(global_tid);
1787 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001788
Joachim Protze82e94a52017-11-01 10:08:30 +00001789#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001790 kmp_info_t *this_thr = __kmp_threads[global_tid];
1791 kmp_team_t *team = this_thr->th.th_team;
1792 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793
Joachim Protze82e94a52017-11-01 10:08:30 +00001794 if (ompt_enabled.ompt_callback_work) {
1795 ompt_callbacks.ompt_callback(ompt_callback_work)(
1796 ompt_work_single_executor, ompt_scope_end,
1797 &(team->t.ompt_team_info.parallel_data),
1798 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1799 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001800 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001801#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802}
1803
1804/*!
1805@ingroup WORK_SHARING
1806@param loc Source location
1807@param global_tid Global thread id
1808
1809Mark the end of a statically scheduled loop.
1810*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001811void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001812 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814
Joachim Protze82e94a52017-11-01 10:08:30 +00001815#if OMPT_SUPPORT && OMPT_OPTIONAL
1816 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001817 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001819 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1820 // Determine workshare type
1821 if (loc != NULL) {
1822 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1823 ompt_work_type = ompt_work_loop;
1824 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1825 ompt_work_type = ompt_work_sections;
1826 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1827 ompt_work_type = ompt_work_distribute;
1828 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001829 // use default set above.
1830 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001831 }
1832 KMP_DEBUG_ASSERT(ompt_work_type);
1833 }
1834 ompt_callbacks.ompt_callback(ompt_callback_work)(
1835 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1836 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001837 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001838#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 if (__kmp_env_consistency_check)
1840 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001841}
1842
Jonathan Peyton30419822017-05-12 18:01:32 +00001843// User routines which take C-style arguments (call by value)
1844// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001845
Jonathan Peyton30419822017-05-12 18:01:32 +00001846void ompc_set_num_threads(int arg) {
1847 // !!!!! TODO: check the per-task binding
1848 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001849}
1850
Jonathan Peyton30419822017-05-12 18:01:32 +00001851void ompc_set_dynamic(int flag) {
1852 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 /* For the thread-private implementation of the internal controls */
1855 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856
Jonathan Peyton30419822017-05-12 18:01:32 +00001857 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001858
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001860}
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862void ompc_set_nested(int flag) {
1863 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001864
Jonathan Peyton30419822017-05-12 18:01:32 +00001865 /* For the thread-private internal controls implementation */
1866 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867
Jonathan Peyton30419822017-05-12 18:01:32 +00001868 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001869
Jonathan Peyton30419822017-05-12 18:01:32 +00001870 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871}
1872
Jonathan Peyton30419822017-05-12 18:01:32 +00001873void ompc_set_max_active_levels(int max_active_levels) {
1874 /* TO DO */
1875 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876
Jonathan Peyton30419822017-05-12 18:01:32 +00001877 /* For the per-thread internal controls implementation */
1878 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001879}
1880
Jonathan Peyton30419822017-05-12 18:01:32 +00001881void ompc_set_schedule(omp_sched_t kind, int modifier) {
1882 // !!!!! TODO: check the per-task binding
1883 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001884}
1885
Jonathan Peyton30419822017-05-12 18:01:32 +00001886int ompc_get_ancestor_thread_num(int level) {
1887 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888}
1889
Jonathan Peyton30419822017-05-12 18:01:32 +00001890int ompc_get_team_size(int level) {
1891 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001892}
1893
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001894#if OMP_50_ENABLED
1895/* OpenMP 5.0 Affinity Format API */
1896
1897void ompc_set_affinity_format(char const *format) {
1898 if (!__kmp_init_serial) {
1899 __kmp_serial_initialize();
1900 }
1901 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1902 format, KMP_STRLEN(format) + 1);
1903}
1904
1905size_t ompc_get_affinity_format(char *buffer, size_t size) {
1906 size_t format_size;
1907 if (!__kmp_init_serial) {
1908 __kmp_serial_initialize();
1909 }
1910 format_size = KMP_STRLEN(__kmp_affinity_format);
1911 if (buffer && size) {
1912 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1913 format_size + 1);
1914 }
1915 return format_size;
1916}
1917
1918void ompc_display_affinity(char const *format) {
1919 int gtid;
1920 if (!TCR_4(__kmp_init_middle)) {
1921 __kmp_middle_initialize();
1922 }
1923 gtid = __kmp_get_gtid();
1924 __kmp_aux_display_affinity(gtid, format);
1925}
1926
1927size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1928 char const *format) {
1929 int gtid;
1930 size_t num_required;
1931 kmp_str_buf_t capture_buf;
1932 if (!TCR_4(__kmp_init_middle)) {
1933 __kmp_middle_initialize();
1934 }
1935 gtid = __kmp_get_gtid();
1936 __kmp_str_buf_init(&capture_buf);
1937 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1938 if (buffer && buf_size) {
1939 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1940 capture_buf.used + 1);
1941 }
1942 __kmp_str_buf_free(&capture_buf);
1943 return num_required;
1944}
1945#endif /* OMP_50_ENABLED */
1946
Jonathan Peyton30419822017-05-12 18:01:32 +00001947void kmpc_set_stacksize(int arg) {
1948 // __kmp_aux_set_stacksize initializes the library if needed
1949 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950}
1951
Jonathan Peyton30419822017-05-12 18:01:32 +00001952void kmpc_set_stacksize_s(size_t arg) {
1953 // __kmp_aux_set_stacksize initializes the library if needed
1954 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955}
1956
Jonathan Peyton30419822017-05-12 18:01:32 +00001957void kmpc_set_blocktime(int arg) {
1958 int gtid, tid;
1959 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960
Jonathan Peyton30419822017-05-12 18:01:32 +00001961 gtid = __kmp_entry_gtid();
1962 tid = __kmp_tid_from_gtid(gtid);
1963 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964
Jonathan Peyton30419822017-05-12 18:01:32 +00001965 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966}
1967
Jonathan Peyton30419822017-05-12 18:01:32 +00001968void kmpc_set_library(int arg) {
1969 // __kmp_user_set_library initializes the library if needed
1970 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971}
1972
Jonathan Peyton30419822017-05-12 18:01:32 +00001973void kmpc_set_defaults(char const *str) {
1974 // __kmp_aux_set_defaults initializes the library if needed
1975 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976}
1977
Jonathan Peyton30419822017-05-12 18:01:32 +00001978void kmpc_set_disp_num_buffers(int arg) {
1979 // ignore after initialization because some teams have already
1980 // allocated dispatch buffers
1981 if (__kmp_init_serial == 0 && arg > 0)
1982 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001983}
1984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001986#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 if (!TCR_4(__kmp_init_middle)) {
1990 __kmp_middle_initialize();
1991 }
1992 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001993#endif
1994}
1995
Jonathan Peyton30419822017-05-12 18:01:32 +00001996int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001997#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001998 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002000 if (!TCR_4(__kmp_init_middle)) {
2001 __kmp_middle_initialize();
2002 }
2003 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004#endif
2005}
2006
Jonathan Peyton30419822017-05-12 18:01:32 +00002007int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002008#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002009 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 if (!TCR_4(__kmp_init_middle)) {
2012 __kmp_middle_initialize();
2013 }
2014 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015#endif
2016}
2017
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018/* -------------------------------------------------------------------------- */
2019/*!
2020@ingroup THREADPRIVATE
2021@param loc source location information
2022@param gtid global thread number
2023@param cpy_size size of the cpy_data buffer
2024@param cpy_data pointer to data to be copied
2025@param cpy_func helper function to call for copying data
2026@param didit flag variable: 1=single thread; 0=not single thread
2027
Jonathan Peyton30419822017-05-12 18:01:32 +00002028__kmpc_copyprivate implements the interface for the private data broadcast
2029needed for the copyprivate clause associated with a single region in an
2030OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002032One of the threads (called the single thread) should have the <tt>didit</tt>
2033variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034All threads pass a pointer to a data buffer (cpy_data) that they have built.
2035
Jonathan Peyton30419822017-05-12 18:01:32 +00002036The OpenMP specification forbids the use of nowait on the single region when a
2037copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2038barrier internally to avoid race conditions, so the code generation for the
2039single region should avoid generating a barrier after the call to @ref
2040__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041
2042The <tt>gtid</tt> parameter is the global thread id for the current thread.
2043The <tt>loc</tt> parameter is a pointer to source location information.
2044
Jonathan Peyton30419822017-05-12 18:01:32 +00002045Internal implementation: The single thread will first copy its descriptor
2046address (cpy_data) to a team-private location, then the other threads will each
2047call the function pointed to by the parameter cpy_func, which carries out the
2048copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049
Jonathan Peyton30419822017-05-12 18:01:32 +00002050The cpy_func routine used for the copy and the contents of the data area defined
2051by cpy_data and cpy_size may be built in any fashion that will allow the copy
2052to be done. For instance, the cpy_data buffer can hold the actual data to be
2053copied or it may hold a list of pointers to the data. The cpy_func routine must
2054interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055
2056The interface to cpy_func is as follows:
2057@code
2058void cpy_func( void *destination, void *source )
2059@endcode
2060where void *destination is the cpy_data pointer for the thread being copied to
2061and void *source is the cpy_data pointer for the thread being copied from.
2062*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002063void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2064 void *cpy_data, void (*cpy_func)(void *, void *),
2065 kmp_int32 didit) {
2066 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067
Jonathan Peyton30419822017-05-12 18:01:32 +00002068 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002069
Jonathan Peyton30419822017-05-12 18:01:32 +00002070 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002071
Jonathan Peyton30419822017-05-12 18:01:32 +00002072 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002073
Jonathan Peyton30419822017-05-12 18:01:32 +00002074 if (__kmp_env_consistency_check) {
2075 if (loc == 0) {
2076 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002077 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002078 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079
Jonathan Peyton30419822017-05-12 18:01:32 +00002080 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081
Jonathan Peyton30419822017-05-12 18:01:32 +00002082 if (didit)
2083 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002084
Joachim Protze82e94a52017-11-01 10:08:30 +00002085#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002086 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002087 if (ompt_enabled.enabled) {
2088 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002089 if (ompt_frame->enter_frame.ptr == NULL)
2090 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002091 OMPT_STORE_RETURN_ADDRESS(gtid);
2092 }
2093#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002094/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002095#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002096 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002097#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099
Jonathan Peyton30419822017-05-12 18:01:32 +00002100 if (!didit)
2101 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002102
Jonathan Peyton30419822017-05-12 18:01:32 +00002103// Consider next barrier a user-visible barrier for barrier region boundaries
2104// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105
Joachim Protze82e94a52017-11-01 10:08:30 +00002106#if OMPT_SUPPORT
2107 if (ompt_enabled.enabled) {
2108 OMPT_STORE_RETURN_ADDRESS(gtid);
2109 }
2110#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002111#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002112 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2113// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002114#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002115 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002116#if OMPT_SUPPORT && OMPT_OPTIONAL
2117 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002118 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002119 }
2120#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002121}
2122
2123/* -------------------------------------------------------------------------- */
2124
Jonathan Peyton30419822017-05-12 18:01:32 +00002125#define INIT_LOCK __kmp_init_user_lock_with_checks
2126#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2127#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2128#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2129#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2130#define ACQUIRE_NESTED_LOCK_TIMED \
2131 __kmp_acquire_nested_user_lock_with_checks_timed
2132#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2133#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2134#define TEST_LOCK __kmp_test_user_lock_with_checks
2135#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2136#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2137#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138
Jonathan Peyton30419822017-05-12 18:01:32 +00002139// TODO: Make check abort messages use location info & pass it into
2140// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002142#if KMP_USE_DYNAMIC_LOCK
2143
2144// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002145static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2146 kmp_dyna_lockseq_t seq) {
2147 if (KMP_IS_D_LOCK(seq)) {
2148 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002149#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002150 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002151#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002152 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002153 KMP_INIT_I_LOCK(lock, seq);
2154#if USE_ITT_BUILD
2155 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2156 __kmp_itt_lock_creating(ilk->lock, loc);
2157#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002158 }
2159}
2160
2161// internal nest lock initializer
2162static __forceinline void
2163__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2164 kmp_dyna_lockseq_t seq) {
2165#if KMP_USE_TSX
2166 // Don't have nested lock implementation for speculative locks
2167 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2168 seq = __kmp_user_lock_seq;
2169#endif
2170 switch (seq) {
2171 case lockseq_tas:
2172 seq = lockseq_nested_tas;
2173 break;
2174#if KMP_USE_FUTEX
2175 case lockseq_futex:
2176 seq = lockseq_nested_futex;
2177 break;
2178#endif
2179 case lockseq_ticket:
2180 seq = lockseq_nested_ticket;
2181 break;
2182 case lockseq_queuing:
2183 seq = lockseq_nested_queuing;
2184 break;
2185 case lockseq_drdpa:
2186 seq = lockseq_nested_drdpa;
2187 break;
2188 default:
2189 seq = lockseq_nested_queuing;
2190 }
2191 KMP_INIT_I_LOCK(lock, seq);
2192#if USE_ITT_BUILD
2193 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2194 __kmp_itt_lock_creating(ilk->lock, loc);
2195#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002196}
2197
2198/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002199void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2200 uintptr_t hint) {
2201 KMP_DEBUG_ASSERT(__kmp_init_serial);
2202 if (__kmp_env_consistency_check && user_lock == NULL) {
2203 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2204 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002205
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002207
2208#if OMPT_SUPPORT && OMPT_OPTIONAL
2209 // This is the case, if called from omp_init_lock_with_hint:
2210 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2211 if (!codeptr)
2212 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2213 if (ompt_enabled.ompt_callback_lock_init) {
2214 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2215 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002216 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002217 codeptr);
2218 }
2219#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002220}
2221
2222/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002223void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2224 void **user_lock, uintptr_t hint) {
2225 KMP_DEBUG_ASSERT(__kmp_init_serial);
2226 if (__kmp_env_consistency_check && user_lock == NULL) {
2227 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2228 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002229
Jonathan Peyton30419822017-05-12 18:01:32 +00002230 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002231
2232#if OMPT_SUPPORT && OMPT_OPTIONAL
2233 // This is the case, if called from omp_init_lock_with_hint:
2234 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2235 if (!codeptr)
2236 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2237 if (ompt_enabled.ompt_callback_lock_init) {
2238 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2239 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002240 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002241 codeptr);
2242 }
2243#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002244}
2245
2246#endif // KMP_USE_DYNAMIC_LOCK
2247
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002249void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002250#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002251
2252 KMP_DEBUG_ASSERT(__kmp_init_serial);
2253 if (__kmp_env_consistency_check && user_lock == NULL) {
2254 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2255 }
2256 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002257
Joachim Protze82e94a52017-11-01 10:08:30 +00002258#if OMPT_SUPPORT && OMPT_OPTIONAL
2259 // This is the case, if called from omp_init_lock_with_hint:
2260 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2261 if (!codeptr)
2262 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2263 if (ompt_enabled.ompt_callback_lock_init) {
2264 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2265 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002266 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002267 codeptr);
2268 }
2269#endif
2270
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002271#else // KMP_USE_DYNAMIC_LOCK
2272
Jonathan Peyton30419822017-05-12 18:01:32 +00002273 static char const *const func = "omp_init_lock";
2274 kmp_user_lock_p lck;
2275 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276
Jonathan Peyton30419822017-05-12 18:01:32 +00002277 if (__kmp_env_consistency_check) {
2278 if (user_lock == NULL) {
2279 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282
Jonathan Peyton30419822017-05-12 18:01:32 +00002283 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002284
Jonathan Peyton30419822017-05-12 18:01:32 +00002285 if ((__kmp_user_lock_kind == lk_tas) &&
2286 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2287 lck = (kmp_user_lock_p)user_lock;
2288 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002289#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002290 else if ((__kmp_user_lock_kind == lk_futex) &&
2291 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2292 lck = (kmp_user_lock_p)user_lock;
2293 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002295 else {
2296 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2297 }
2298 INIT_LOCK(lck);
2299 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300
Joachim Protze82e94a52017-11-01 10:08:30 +00002301#if OMPT_SUPPORT && OMPT_OPTIONAL
2302 // This is the case, if called from omp_init_lock_with_hint:
2303 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2304 if (!codeptr)
2305 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2306 if (ompt_enabled.ompt_callback_lock_init) {
2307 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2308 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002309 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002310 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002311#endif
2312
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002314 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002316
2317#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318} // __kmpc_init_lock
2319
2320/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002321void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002322#if KMP_USE_DYNAMIC_LOCK
2323
Jonathan Peyton30419822017-05-12 18:01:32 +00002324 KMP_DEBUG_ASSERT(__kmp_init_serial);
2325 if (__kmp_env_consistency_check && user_lock == NULL) {
2326 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2327 }
2328 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002329
Joachim Protze82e94a52017-11-01 10:08:30 +00002330#if OMPT_SUPPORT && OMPT_OPTIONAL
2331 // This is the case, if called from omp_init_lock_with_hint:
2332 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2333 if (!codeptr)
2334 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2335 if (ompt_enabled.ompt_callback_lock_init) {
2336 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2337 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002338 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002339 codeptr);
2340 }
2341#endif
2342
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002343#else // KMP_USE_DYNAMIC_LOCK
2344
Jonathan Peyton30419822017-05-12 18:01:32 +00002345 static char const *const func = "omp_init_nest_lock";
2346 kmp_user_lock_p lck;
2347 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348
Jonathan Peyton30419822017-05-12 18:01:32 +00002349 if (__kmp_env_consistency_check) {
2350 if (user_lock == NULL) {
2351 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002353 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354
Jonathan Peyton30419822017-05-12 18:01:32 +00002355 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356
Jonathan Peyton30419822017-05-12 18:01:32 +00002357 if ((__kmp_user_lock_kind == lk_tas) &&
2358 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2359 OMP_NEST_LOCK_T_SIZE)) {
2360 lck = (kmp_user_lock_p)user_lock;
2361 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002362#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002363 else if ((__kmp_user_lock_kind == lk_futex) &&
2364 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2365 OMP_NEST_LOCK_T_SIZE)) {
2366 lck = (kmp_user_lock_p)user_lock;
2367 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 else {
2370 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2371 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 INIT_NESTED_LOCK(lck);
2374 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375
Joachim Protze82e94a52017-11-01 10:08:30 +00002376#if OMPT_SUPPORT && OMPT_OPTIONAL
2377 // This is the case, if called from omp_init_lock_with_hint:
2378 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2379 if (!codeptr)
2380 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2381 if (ompt_enabled.ompt_callback_lock_init) {
2382 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2383 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002384 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002385 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002386#endif
2387
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002389 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002391
2392#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393} // __kmpc_init_nest_lock
2394
Jonathan Peyton30419822017-05-12 18:01:32 +00002395void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002396#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002397
Jonathan Peyton30419822017-05-12 18:01:32 +00002398#if USE_ITT_BUILD
2399 kmp_user_lock_p lck;
2400 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2401 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2402 } else {
2403 lck = (kmp_user_lock_p)user_lock;
2404 }
2405 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002406#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002407#if OMPT_SUPPORT && OMPT_OPTIONAL
2408 // This is the case, if called from omp_init_lock_with_hint:
2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2410 if (!codeptr)
2411 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412 if (ompt_enabled.ompt_callback_lock_destroy) {
2413 kmp_user_lock_p lck;
2414 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2415 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2416 } else {
2417 lck = (kmp_user_lock_p)user_lock;
2418 }
2419 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002420 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002421 }
2422#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002423 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2424#else
2425 kmp_user_lock_p lck;
2426
2427 if ((__kmp_user_lock_kind == lk_tas) &&
2428 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2429 lck = (kmp_user_lock_p)user_lock;
2430 }
2431#if KMP_USE_FUTEX
2432 else if ((__kmp_user_lock_kind == lk_futex) &&
2433 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2434 lck = (kmp_user_lock_p)user_lock;
2435 }
2436#endif
2437 else {
2438 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2439 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002440
Joachim Protze82e94a52017-11-01 10:08:30 +00002441#if OMPT_SUPPORT && OMPT_OPTIONAL
2442 // This is the case, if called from omp_init_lock_with_hint:
2443 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2444 if (!codeptr)
2445 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2446 if (ompt_enabled.ompt_callback_lock_destroy) {
2447 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002448 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002449 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002450#endif
2451
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002453 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002455 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456
Jonathan Peyton30419822017-05-12 18:01:32 +00002457 if ((__kmp_user_lock_kind == lk_tas) &&
2458 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2459 ;
2460 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002461#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002462 else if ((__kmp_user_lock_kind == lk_futex) &&
2463 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2464 ;
2465 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002467 else {
2468 __kmp_user_lock_free(user_lock, gtid, lck);
2469 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002470#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471} // __kmpc_destroy_lock
2472
2473/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002474void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002475#if KMP_USE_DYNAMIC_LOCK
2476
Jonathan Peyton30419822017-05-12 18:01:32 +00002477#if USE_ITT_BUILD
2478 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2479 __kmp_itt_lock_destroyed(ilk->lock);
2480#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002481#if OMPT_SUPPORT && OMPT_OPTIONAL
2482 // This is the case, if called from omp_init_lock_with_hint:
2483 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2484 if (!codeptr)
2485 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2486 if (ompt_enabled.ompt_callback_lock_destroy) {
2487 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002488 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002489 }
2490#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002491 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002492
2493#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494
Jonathan Peyton30419822017-05-12 18:01:32 +00002495 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002496
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 if ((__kmp_user_lock_kind == lk_tas) &&
2498 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2499 OMP_NEST_LOCK_T_SIZE)) {
2500 lck = (kmp_user_lock_p)user_lock;
2501 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002502#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002503 else if ((__kmp_user_lock_kind == lk_futex) &&
2504 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2505 OMP_NEST_LOCK_T_SIZE)) {
2506 lck = (kmp_user_lock_p)user_lock;
2507 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002508#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002509 else {
2510 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2511 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512
Joachim Protze82e94a52017-11-01 10:08:30 +00002513#if OMPT_SUPPORT && OMPT_OPTIONAL
2514 // This is the case, if called from omp_init_lock_with_hint:
2515 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2516 if (!codeptr)
2517 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2518 if (ompt_enabled.ompt_callback_lock_destroy) {
2519 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002520 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002521 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002522#endif
2523
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002525 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526#endif /* USE_ITT_BUILD */
2527
Jonathan Peyton30419822017-05-12 18:01:32 +00002528 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529
Jonathan Peyton30419822017-05-12 18:01:32 +00002530 if ((__kmp_user_lock_kind == lk_tas) &&
2531 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2532 OMP_NEST_LOCK_T_SIZE)) {
2533 ;
2534 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002535#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002536 else if ((__kmp_user_lock_kind == lk_futex) &&
2537 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2538 OMP_NEST_LOCK_T_SIZE)) {
2539 ;
2540 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002542 else {
2543 __kmp_user_lock_free(user_lock, gtid, lck);
2544 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002545#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546} // __kmpc_destroy_nest_lock
2547
Jonathan Peyton30419822017-05-12 18:01:32 +00002548void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2549 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002550#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002551 int tag = KMP_EXTRACT_D_TAG(user_lock);
2552#if USE_ITT_BUILD
2553 __kmp_itt_lock_acquiring(
2554 (kmp_user_lock_p)
2555 user_lock); // itt function will get to the right lock object.
2556#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002557#if OMPT_SUPPORT && OMPT_OPTIONAL
2558 // This is the case, if called from omp_init_lock_with_hint:
2559 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2560 if (!codeptr)
2561 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2562 if (ompt_enabled.ompt_callback_mutex_acquire) {
2563 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2564 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002565 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002566 codeptr);
2567 }
2568#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002569#if KMP_USE_INLINED_TAS
2570 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2571 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2572 } else
2573#elif KMP_USE_INLINED_FUTEX
2574 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2575 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2576 } else
2577#endif
2578 {
2579 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2580 }
2581#if USE_ITT_BUILD
2582 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2583#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002584#if OMPT_SUPPORT && OMPT_OPTIONAL
2585 if (ompt_enabled.ompt_callback_mutex_acquired) {
2586 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002587 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002588 }
2589#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002590
2591#else // KMP_USE_DYNAMIC_LOCK
2592
Jonathan Peyton30419822017-05-12 18:01:32 +00002593 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002594
Jonathan Peyton30419822017-05-12 18:01:32 +00002595 if ((__kmp_user_lock_kind == lk_tas) &&
2596 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2597 lck = (kmp_user_lock_p)user_lock;
2598 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002599#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 else if ((__kmp_user_lock_kind == lk_futex) &&
2601 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2602 lck = (kmp_user_lock_p)user_lock;
2603 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 else {
2606 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2607 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608
2609#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002612#if OMPT_SUPPORT && OMPT_OPTIONAL
2613 // This is the case, if called from omp_init_lock_with_hint:
2614 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2615 if (!codeptr)
2616 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2617 if (ompt_enabled.ompt_callback_mutex_acquire) {
2618 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2619 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002620 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002621 }
2622#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623
Jonathan Peyton30419822017-05-12 18:01:32 +00002624 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002625
2626#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002627 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002628#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
Joachim Protze82e94a52017-11-01 10:08:30 +00002630#if OMPT_SUPPORT && OMPT_OPTIONAL
2631 if (ompt_enabled.ompt_callback_mutex_acquired) {
2632 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002633 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002634 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002635#endif
2636
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002637#endif // KMP_USE_DYNAMIC_LOCK
2638}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639
Jonathan Peyton30419822017-05-12 18:01:32 +00002640void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002641#if KMP_USE_DYNAMIC_LOCK
2642
Jonathan Peyton30419822017-05-12 18:01:32 +00002643#if USE_ITT_BUILD
2644 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2645#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002646#if OMPT_SUPPORT && OMPT_OPTIONAL
2647 // This is the case, if called from omp_init_lock_with_hint:
2648 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2649 if (!codeptr)
2650 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2651 if (ompt_enabled.enabled) {
2652 if (ompt_enabled.ompt_callback_mutex_acquire) {
2653 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2654 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002655 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002656 codeptr);
2657 }
2658 }
2659#endif
2660 int acquire_status =
2661 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002662 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002663#if USE_ITT_BUILD
2664 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002665#endif
2666
Joachim Protze82e94a52017-11-01 10:08:30 +00002667#if OMPT_SUPPORT && OMPT_OPTIONAL
2668 if (ompt_enabled.enabled) {
2669 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2670 if (ompt_enabled.ompt_callback_mutex_acquired) {
2671 // lock_first
2672 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002673 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002674 }
2675 } else {
2676 if (ompt_enabled.ompt_callback_nest_lock) {
2677 // lock_next
2678 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002679 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002680 }
2681 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002682 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002683#endif
2684
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002685#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002686 int acquire_status;
2687 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688
Jonathan Peyton30419822017-05-12 18:01:32 +00002689 if ((__kmp_user_lock_kind == lk_tas) &&
2690 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2691 OMP_NEST_LOCK_T_SIZE)) {
2692 lck = (kmp_user_lock_p)user_lock;
2693 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002694#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002695 else if ((__kmp_user_lock_kind == lk_futex) &&
2696 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2697 OMP_NEST_LOCK_T_SIZE)) {
2698 lck = (kmp_user_lock_p)user_lock;
2699 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002700#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002701 else {
2702 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2703 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002704
2705#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002707#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002708#if OMPT_SUPPORT && OMPT_OPTIONAL
2709 // This is the case, if called from omp_init_lock_with_hint:
2710 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2711 if (!codeptr)
2712 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2713 if (ompt_enabled.enabled) {
2714 if (ompt_enabled.ompt_callback_mutex_acquire) {
2715 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2716 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002717 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002718 }
2719 }
2720#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002721
Jonathan Peyton30419822017-05-12 18:01:32 +00002722 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723
2724#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002725 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002727
Joachim Protze82e94a52017-11-01 10:08:30 +00002728#if OMPT_SUPPORT && OMPT_OPTIONAL
2729 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002731 if (ompt_enabled.ompt_callback_mutex_acquired) {
2732 // lock_first
2733 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002734 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002735 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002736 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002737 if (ompt_enabled.ompt_callback_nest_lock) {
2738 // lock_next
2739 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002740 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002741 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002742 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002743 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002744#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002745
2746#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747}
2748
Jonathan Peyton30419822017-05-12 18:01:32 +00002749void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002750#if KMP_USE_DYNAMIC_LOCK
2751
Jonathan Peyton30419822017-05-12 18:01:32 +00002752 int tag = KMP_EXTRACT_D_TAG(user_lock);
2753#if USE_ITT_BUILD
2754 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2755#endif
2756#if KMP_USE_INLINED_TAS
2757 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2758 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2759 } else
2760#elif KMP_USE_INLINED_FUTEX
2761 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2762 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2763 } else
2764#endif
2765 {
2766 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2767 }
2768
Joachim Protze82e94a52017-11-01 10:08:30 +00002769#if OMPT_SUPPORT && OMPT_OPTIONAL
2770 // This is the case, if called from omp_init_lock_with_hint:
2771 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2772 if (!codeptr)
2773 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2774 if (ompt_enabled.ompt_callback_mutex_released) {
2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002776 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002777 }
2778#endif
2779
Jonathan Peyton30419822017-05-12 18:01:32 +00002780#else // KMP_USE_DYNAMIC_LOCK
2781
2782 kmp_user_lock_p lck;
2783
2784 /* Can't use serial interval since not block structured */
2785 /* release the lock */
2786
2787 if ((__kmp_user_lock_kind == lk_tas) &&
2788 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2789#if KMP_OS_LINUX && \
2790 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2791// "fast" path implemented to fix customer performance issue
2792#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002793 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002794#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002795 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2796 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002797
2798#if OMPT_SUPPORT && OMPT_OPTIONAL
2799 // This is the case, if called from omp_init_lock_with_hint:
2800 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2801 if (!codeptr)
2802 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2803 if (ompt_enabled.ompt_callback_mutex_released) {
2804 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002805 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002806 }
2807#endif
2808
Jonathan Peyton30419822017-05-12 18:01:32 +00002809 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002810#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002811 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002812#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002813 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002814#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002815 else if ((__kmp_user_lock_kind == lk_futex) &&
2816 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2817 lck = (kmp_user_lock_p)user_lock;
2818 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002819#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002820 else {
2821 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2822 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823
2824#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002825 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002826#endif /* USE_ITT_BUILD */
2827
Jonathan Peyton30419822017-05-12 18:01:32 +00002828 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002829
Joachim Protze82e94a52017-11-01 10:08:30 +00002830#if OMPT_SUPPORT && OMPT_OPTIONAL
2831 // This is the case, if called from omp_init_lock_with_hint:
2832 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2833 if (!codeptr)
2834 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2835 if (ompt_enabled.ompt_callback_mutex_released) {
2836 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002837 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002838 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002839#endif
2840
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002841#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842}
2843
2844/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002845void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002846#if KMP_USE_DYNAMIC_LOCK
2847
Jonathan Peyton30419822017-05-12 18:01:32 +00002848#if USE_ITT_BUILD
2849 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2850#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002851 int release_status =
2852 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002853 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002854
2855#if OMPT_SUPPORT && OMPT_OPTIONAL
2856 // This is the case, if called from omp_init_lock_with_hint:
2857 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2858 if (!codeptr)
2859 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2860 if (ompt_enabled.enabled) {
2861 if (release_status == KMP_LOCK_RELEASED) {
2862 if (ompt_enabled.ompt_callback_mutex_released) {
2863 // release_lock_last
2864 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002865 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002866 }
2867 } else if (ompt_enabled.ompt_callback_nest_lock) {
2868 // release_lock_prev
2869 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002870 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002871 }
2872 }
2873#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002874
2875#else // KMP_USE_DYNAMIC_LOCK
2876
2877 kmp_user_lock_p lck;
2878
2879 /* Can't use serial interval since not block structured */
2880
2881 if ((__kmp_user_lock_kind == lk_tas) &&
2882 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2883 OMP_NEST_LOCK_T_SIZE)) {
2884#if KMP_OS_LINUX && \
2885 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2886 // "fast" path implemented to fix customer performance issue
2887 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2888#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002889 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002890#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002891
2892#if OMPT_SUPPORT && OMPT_OPTIONAL
2893 int release_status = KMP_LOCK_STILL_HELD;
2894#endif
2895
Jonathan Peyton30419822017-05-12 18:01:32 +00002896 if (--(tl->lk.depth_locked) == 0) {
2897 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002898#if OMPT_SUPPORT && OMPT_OPTIONAL
2899 release_status = KMP_LOCK_RELEASED;
2900#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002901 }
2902 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002903
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.enabled) {
2910 if (release_status == KMP_LOCK_RELEASED) {
2911 if (ompt_enabled.ompt_callback_mutex_released) {
2912 // release_lock_last
2913 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002914 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002915 }
2916 } else if (ompt_enabled.ompt_callback_nest_lock) {
2917 // release_lock_previous
2918 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002919 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002920 }
2921 }
2922#endif
2923
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002926 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002927#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002928 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002929#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002930 else if ((__kmp_user_lock_kind == lk_futex) &&
2931 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2932 OMP_NEST_LOCK_T_SIZE)) {
2933 lck = (kmp_user_lock_p)user_lock;
2934 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002935#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002936 else {
2937 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2938 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939
2940#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942#endif /* USE_ITT_BUILD */
2943
Jonathan Peyton30419822017-05-12 18:01:32 +00002944 int release_status;
2945 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002946#if OMPT_SUPPORT && OMPT_OPTIONAL
2947 // This is the case, if called from omp_init_lock_with_hint:
2948 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2949 if (!codeptr)
2950 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2951 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002952 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002953 if (ompt_enabled.ompt_callback_mutex_released) {
2954 // release_lock_last
2955 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002956 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002958 } else if (ompt_enabled.ompt_callback_nest_lock) {
2959 // release_lock_previous
2960 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002961 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002962 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002963 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002964#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002965
2966#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002967}
2968
2969/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002970int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2971 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002972
2973#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002974 int rc;
2975 int tag = KMP_EXTRACT_D_TAG(user_lock);
2976#if USE_ITT_BUILD
2977 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2978#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002979#if OMPT_SUPPORT && OMPT_OPTIONAL
2980 // This is the case, if called from omp_init_lock_with_hint:
2981 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2982 if (!codeptr)
2983 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2984 if (ompt_enabled.ompt_callback_mutex_acquire) {
2985 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2986 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002987 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002988 codeptr);
2989 }
2990#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002991#if KMP_USE_INLINED_TAS
2992 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2993 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2994 } else
2995#elif KMP_USE_INLINED_FUTEX
2996 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2997 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2998 } else
2999#endif
3000 {
3001 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3002 }
3003 if (rc) {
3004#if USE_ITT_BUILD
3005 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3006#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003007#if OMPT_SUPPORT && OMPT_OPTIONAL
3008 if (ompt_enabled.ompt_callback_mutex_acquired) {
3009 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003010 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003011 }
3012#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003013 return FTN_TRUE;
3014 } else {
3015#if USE_ITT_BUILD
3016 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3017#endif
3018 return FTN_FALSE;
3019 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003020
3021#else // KMP_USE_DYNAMIC_LOCK
3022
Jonathan Peyton30419822017-05-12 18:01:32 +00003023 kmp_user_lock_p lck;
3024 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003025
Jonathan Peyton30419822017-05-12 18:01:32 +00003026 if ((__kmp_user_lock_kind == lk_tas) &&
3027 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3028 lck = (kmp_user_lock_p)user_lock;
3029 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003030#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003031 else if ((__kmp_user_lock_kind == lk_futex) &&
3032 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033 lck = (kmp_user_lock_p)user_lock;
3034 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003035#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003036 else {
3037 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3038 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039
3040#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003043#if OMPT_SUPPORT && OMPT_OPTIONAL
3044 // This is the case, if called from omp_init_lock_with_hint:
3045 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3046 if (!codeptr)
3047 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3048 if (ompt_enabled.ompt_callback_mutex_acquire) {
3049 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3050 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003051 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003052 }
3053#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003054
Jonathan Peyton30419822017-05-12 18:01:32 +00003055 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003057 if (rc) {
3058 __kmp_itt_lock_acquired(lck);
3059 } else {
3060 __kmp_itt_lock_cancelled(lck);
3061 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003063#if OMPT_SUPPORT && OMPT_OPTIONAL
3064 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3065 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003066 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003067 }
3068#endif
3069
Jonathan Peyton30419822017-05-12 18:01:32 +00003070 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003071
Jonathan Peyton30419822017-05-12 18:01:32 +00003072/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003073
3074#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003075}
3076
3077/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003078int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003079#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003080 int rc;
3081#if USE_ITT_BUILD
3082 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3083#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003084#if OMPT_SUPPORT && OMPT_OPTIONAL
3085 // This is the case, if called from omp_init_lock_with_hint:
3086 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3087 if (!codeptr)
3088 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3089 if (ompt_enabled.ompt_callback_mutex_acquire) {
3090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3091 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003092 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003093 codeptr);
3094 }
3095#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003096 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3097#if USE_ITT_BUILD
3098 if (rc) {
3099 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3100 } else {
3101 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3102 }
3103#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003104#if OMPT_SUPPORT && OMPT_OPTIONAL
3105 if (ompt_enabled.enabled && rc) {
3106 if (rc == 1) {
3107 if (ompt_enabled.ompt_callback_mutex_acquired) {
3108 // lock_first
3109 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003110 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003111 }
3112 } else {
3113 if (ompt_enabled.ompt_callback_nest_lock) {
3114 // lock_next
3115 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003116 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003117 }
3118 }
3119 }
3120#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003121 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003122
3123#else // KMP_USE_DYNAMIC_LOCK
3124
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 kmp_user_lock_p lck;
3126 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003127
Jonathan Peyton30419822017-05-12 18:01:32 +00003128 if ((__kmp_user_lock_kind == lk_tas) &&
3129 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3130 OMP_NEST_LOCK_T_SIZE)) {
3131 lck = (kmp_user_lock_p)user_lock;
3132 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003133#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003134 else if ((__kmp_user_lock_kind == lk_futex) &&
3135 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3136 OMP_NEST_LOCK_T_SIZE)) {
3137 lck = (kmp_user_lock_p)user_lock;
3138 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003140 else {
3141 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3142 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
3144#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003145 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146#endif /* USE_ITT_BUILD */
3147
Joachim Protze82e94a52017-11-01 10:08:30 +00003148#if OMPT_SUPPORT && OMPT_OPTIONAL
3149 // This is the case, if called from omp_init_lock_with_hint:
3150 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3151 if (!codeptr)
3152 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3153 if (ompt_enabled.enabled) &&
3154 ompt_enabled.ompt_callback_mutex_acquire) {
3155 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3156 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003157 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003158 }
3159#endif
3160
Jonathan Peyton30419822017-05-12 18:01:32 +00003161 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003162#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003163 if (rc) {
3164 __kmp_itt_lock_acquired(lck);
3165 } else {
3166 __kmp_itt_lock_cancelled(lck);
3167 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003168#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003169#if OMPT_SUPPORT && OMPT_OPTIONAL
3170 if (ompt_enabled.enabled && rc) {
3171 if (rc == 1) {
3172 if (ompt_enabled.ompt_callback_mutex_acquired) {
3173 // lock_first
3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003175 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003176 }
3177 } else {
3178 if (ompt_enabled.ompt_callback_nest_lock) {
3179 // lock_next
3180 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003181 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003182 }
3183 }
3184 }
3185#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003186 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003187
Jonathan Peyton30419822017-05-12 18:01:32 +00003188/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003189
3190#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003191}
3192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003194
Jonathan Peyton30419822017-05-12 18:01:32 +00003195// keep the selected method in a thread local structure for cross-function
3196// usage: will be used in __kmpc_end_reduce* functions;
3197// another solution: to re-determine the method one more time in
3198// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003199// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003200#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3201 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003202
Jonathan Peyton30419822017-05-12 18:01:32 +00003203#define __KMP_GET_REDUCTION_METHOD(gtid) \
3204 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205
Jonathan Peyton30419822017-05-12 18:01:32 +00003206// description of the packed_reduction_method variable: look at the macros in
3207// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003208
3209// used in a critical section reduce block
3210static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003211__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3212 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003213
Jonathan Peyton30419822017-05-12 18:01:32 +00003214 // this lock was visible to a customer and to the threading profile tool as a
3215 // serial overhead span (although it's used for an internal purpose only)
3216 // why was it visible in previous implementation?
3217 // should we keep it visible in new reduce block?
3218 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003219
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003220#if KMP_USE_DYNAMIC_LOCK
3221
Jonathan Peyton30419822017-05-12 18:01:32 +00003222 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3223 // Check if it is initialized.
3224 if (*lk == 0) {
3225 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3226 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3227 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003228 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003229 __kmp_init_indirect_csptr(crit, loc, global_tid,
3230 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003231 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003232 }
3233 // Branch for accessing the actual lock object and set operation. This
3234 // branching is inevitable since this lock initialization does not follow the
3235 // normal dispatch path (lock table is not used).
3236 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3237 lck = (kmp_user_lock_p)lk;
3238 KMP_DEBUG_ASSERT(lck != NULL);
3239 if (__kmp_env_consistency_check) {
3240 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3241 }
3242 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3243 } else {
3244 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3245 lck = ilk->lock;
3246 KMP_DEBUG_ASSERT(lck != NULL);
3247 if (__kmp_env_consistency_check) {
3248 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3249 }
3250 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3251 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003252
3253#else // KMP_USE_DYNAMIC_LOCK
3254
Jonathan Peyton30419822017-05-12 18:01:32 +00003255 // We know that the fast reduction code is only emitted by Intel compilers
3256 // with 32 byte critical sections. If there isn't enough space, then we
3257 // have to use a pointer.
3258 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3259 lck = (kmp_user_lock_p)crit;
3260 } else {
3261 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3262 }
3263 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003264
Jonathan Peyton30419822017-05-12 18:01:32 +00003265 if (__kmp_env_consistency_check)
3266 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003267
Jonathan Peyton30419822017-05-12 18:01:32 +00003268 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003269
3270#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003271}
3272
3273// used in a critical section reduce block
3274static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003275__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3276 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003277
Jonathan Peyton30419822017-05-12 18:01:32 +00003278 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003279
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003280#if KMP_USE_DYNAMIC_LOCK
3281
Jonathan Peyton30419822017-05-12 18:01:32 +00003282 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3283 lck = (kmp_user_lock_p)crit;
3284 if (__kmp_env_consistency_check)
3285 __kmp_pop_sync(global_tid, ct_critical, loc);
3286 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3287 } else {
3288 kmp_indirect_lock_t *ilk =
3289 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3290 if (__kmp_env_consistency_check)
3291 __kmp_pop_sync(global_tid, ct_critical, loc);
3292 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3293 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003294
3295#else // KMP_USE_DYNAMIC_LOCK
3296
Jonathan Peyton30419822017-05-12 18:01:32 +00003297 // We know that the fast reduction code is only emitted by Intel compilers
3298 // with 32 byte critical sections. If there isn't enough space, then we have
3299 // to use a pointer.
3300 if (__kmp_base_user_lock_size > 32) {
3301 lck = *((kmp_user_lock_p *)crit);
3302 KMP_ASSERT(lck != NULL);
3303 } else {
3304 lck = (kmp_user_lock_p)crit;
3305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003306
Jonathan Peyton30419822017-05-12 18:01:32 +00003307 if (__kmp_env_consistency_check)
3308 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003309
Jonathan Peyton30419822017-05-12 18:01:32 +00003310 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003311
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003312#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003313} // __kmp_end_critical_section_reduce_block
3314
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003315#if OMP_40_ENABLED
3316static __forceinline int
3317__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3318 int *task_state) {
3319 kmp_team_t *team;
3320
3321 // Check if we are inside the teams construct?
3322 if (th->th.th_teams_microtask) {
3323 *team_p = team = th->th.th_team;
3324 if (team->t.t_level == th->th.th_teams_level) {
3325 // This is reduction at teams construct.
3326 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3327 // Let's swap teams temporarily for the reduction.
3328 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3329 th->th.th_team = team->t.t_parent;
3330 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3331 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3332 *task_state = th->th.th_task_state;
3333 th->th.th_task_state = 0;
3334
3335 return 1;
3336 }
3337 }
3338 return 0;
3339}
3340
3341static __forceinline void
3342__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3343 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3344 th->th.th_info.ds.ds_tid = 0;
3345 th->th.th_team = team;
3346 th->th.th_team_nproc = team->t.t_nproc;
3347 th->th.th_task_team = team->t.t_task_team[task_state];
3348 th->th.th_task_state = task_state;
3349}
3350#endif
3351
Jim Cownie5e8470a2013-09-27 10:38:44 +00003352/* 2.a.i. Reduce Block without a terminating barrier */
3353/*!
3354@ingroup SYNCHRONIZATION
3355@param loc source location information
3356@param global_tid global thread number
3357@param num_vars number of items (variables) to be reduced
3358@param reduce_size size of data in bytes to be reduced
3359@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003360@param reduce_func callback function providing reduction operation on two
3361operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003362@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003363@result 1 for the master thread, 0 for all other team threads, 2 for all team
3364threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003365
3366The nowait version is used for a reduce clause with the nowait argument.
3367*/
3368kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003369__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3370 size_t reduce_size, void *reduce_data,
3371 void (*reduce_func)(void *lhs_data, void *rhs_data),
3372 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003373
Jonathan Peyton30419822017-05-12 18:01:32 +00003374 KMP_COUNT_BLOCK(REDUCE_nowait);
3375 int retval = 0;
3376 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003377#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003378 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003379 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003380 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003381#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003382 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003383
Jonathan Peyton30419822017-05-12 18:01:32 +00003384 // why do we need this initialization here at all?
3385 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003386
Jonathan Peyton30419822017-05-12 18:01:32 +00003387 // do not call __kmp_serial_initialize(), it will be called by
3388 // __kmp_parallel_initialize() if needed
3389 // possible detection of false-positive race by the threadchecker ???
3390 if (!TCR_4(__kmp_init_parallel))
3391 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003392
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003393#if OMP_50_ENABLED
3394 __kmp_resume_if_soft_paused();
3395#endif
3396
Jonathan Peyton30419822017-05-12 18:01:32 +00003397// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003398#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 if (__kmp_env_consistency_check)
3400 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003401#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003402 if (__kmp_env_consistency_check)
3403 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003404#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003405
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003406#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003407 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003408 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003409#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003410
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3412 // the value should be kept in a variable
3413 // the variable should be either a construct-specific or thread-specific
3414 // property, not a team specific property
3415 // (a thread can reach the next reduce block on the next construct, reduce
3416 // method may differ on the next construct)
3417 // an ident_t "loc" parameter could be used as a construct-specific property
3418 // (what if loc == 0?)
3419 // (if both construct-specific and team-specific variables were shared,
3420 // then unness extra syncs should be needed)
3421 // a thread-specific variable is better regarding two issues above (next
3422 // construct and extra syncs)
3423 // a thread-specific "th_local.reduction_method" variable is used currently
3424 // each thread executes 'determine' and 'set' lines (no need to execute by one
3425 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426
Jonathan Peyton30419822017-05-12 18:01:32 +00003427 packed_reduction_method = __kmp_determine_reduction_method(
3428 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3429 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003430
Jonathan Peyton30419822017-05-12 18:01:32 +00003431 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003432
Jonathan Peyton30419822017-05-12 18:01:32 +00003433 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3434 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003435
Jonathan Peyton30419822017-05-12 18:01:32 +00003436 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003437
Jonathan Peyton30419822017-05-12 18:01:32 +00003438 // usage: if team size == 1, no synchronization is required ( Intel
3439 // platforms only )
3440 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441
Jonathan Peyton30419822017-05-12 18:01:32 +00003442 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003443
Jonathan Peyton30419822017-05-12 18:01:32 +00003444 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003445
Jonathan Peyton30419822017-05-12 18:01:32 +00003446 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3447 // won't be called by the code gen)
3448 // (it's not quite good, because the checking block has been closed by
3449 // this 'pop',
3450 // but atomic operation has not been executed yet, will be executed
3451 // slightly later, literally on next instruction)
3452 if (__kmp_env_consistency_check)
3453 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454
Jonathan Peyton30419822017-05-12 18:01:32 +00003455 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3456 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003457
Jonathan Peyton30419822017-05-12 18:01:32 +00003458// AT: performance issue: a real barrier here
3459// AT: (if master goes slow, other threads are blocked here waiting for the
3460// master to come and release them)
3461// AT: (it's not what a customer might expect specifying NOWAIT clause)
3462// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3463// be confusing to a customer)
3464// AT: another implementation of *barrier_gather*nowait() (or some other design)
3465// might go faster and be more in line with sense of NOWAIT
3466// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003467
Jonathan Peyton30419822017-05-12 18:01:32 +00003468// this barrier should be invisible to a customer and to the threading profile
3469// tool (it's neither a terminating barrier nor customer's code, it's
3470// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003471#if OMPT_SUPPORT
3472 // JP: can this barrier potentially leed to task scheduling?
3473 // JP: as long as there is a barrier in the implementation, OMPT should and
3474 // will provide the barrier events
3475 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003476 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003477 if (ompt_enabled.enabled) {
3478 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003479 if (ompt_frame->enter_frame.ptr == NULL)
3480 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003481 OMPT_STORE_RETURN_ADDRESS(global_tid);
3482 }
3483#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003484#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003486#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003487 retval =
3488 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3489 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3490 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003491#if OMPT_SUPPORT && OMPT_OPTIONAL
3492 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003493 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003494 }
3495#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003496
Jonathan Peyton30419822017-05-12 18:01:32 +00003497 // all other workers except master should do this pop here
3498 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3499 if (__kmp_env_consistency_check) {
3500 if (retval == 0) {
3501 __kmp_pop_sync(global_tid, ct_reduce, loc);
3502 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003503 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003504
3505 } else {
3506
3507 // should never reach this block
3508 KMP_ASSERT(0); // "unexpected method"
3509 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003510#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003511 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003512 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003513 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003514#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003515 KA_TRACE(
3516 10,
3517 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3518 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003519
Jonathan Peyton30419822017-05-12 18:01:32 +00003520 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003521}
3522
3523/*!
3524@ingroup SYNCHRONIZATION
3525@param loc source location information
3526@param global_tid global thread id.
3527@param lck pointer to the unique lock data structure
3528
3529Finish the execution of a reduce nowait.
3530*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003531void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3532 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003533
Jonathan Peyton30419822017-05-12 18:01:32 +00003534 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003535
Jonathan Peyton30419822017-05-12 18:01:32 +00003536 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003537
Jonathan Peyton30419822017-05-12 18:01:32 +00003538 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539
Jonathan Peyton30419822017-05-12 18:01:32 +00003540 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003545
Jonathan Peyton30419822017-05-12 18:01:32 +00003546 // usage: if team size == 1, no synchronization is required ( on Intel
3547 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003548
Jonathan Peyton30419822017-05-12 18:01:32 +00003549 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
Jonathan Peyton30419822017-05-12 18:01:32 +00003551 // neither master nor other workers should get here
3552 // (code gen does not generate this call in case 2: atomic reduce block)
3553 // actually it's better to remove this elseif at all;
3554 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003555
Jonathan Peyton30419822017-05-12 18:01:32 +00003556 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3557 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003558
Jonathan Peyton30419822017-05-12 18:01:32 +00003559 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560
Jonathan Peyton30419822017-05-12 18:01:32 +00003561 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
Jonathan Peyton30419822017-05-12 18:01:32 +00003563 // should never reach this block
3564 KMP_ASSERT(0); // "unexpected method"
3565 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003566
Jonathan Peyton30419822017-05-12 18:01:32 +00003567 if (__kmp_env_consistency_check)
3568 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003569
Jonathan Peyton30419822017-05-12 18:01:32 +00003570 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3571 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003572
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574}
3575
3576/* 2.a.ii. Reduce Block with a terminating barrier */
3577
3578/*!
3579@ingroup SYNCHRONIZATION
3580@param loc source location information
3581@param global_tid global thread number
3582@param num_vars number of items (variables) to be reduced
3583@param reduce_size size of data in bytes to be reduced
3584@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003585@param reduce_func callback function providing reduction operation on two
3586operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003587@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003588@result 1 for the master thread, 0 for all other team threads, 2 for all team
3589threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003590
3591A blocking reduce that includes an implicit barrier.
3592*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003593kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3594 size_t reduce_size, void *reduce_data,
3595 void (*reduce_func)(void *lhs_data, void *rhs_data),
3596 kmp_critical_name *lck) {
3597 KMP_COUNT_BLOCK(REDUCE_wait);
3598 int retval = 0;
3599 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003600#if OMP_40_ENABLED
3601 kmp_info_t *th;
3602 kmp_team_t *team;
3603 int teams_swapped = 0, task_state;
3604#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003605
Jonathan Peyton30419822017-05-12 18:01:32 +00003606 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003607
Jonathan Peyton30419822017-05-12 18:01:32 +00003608 // why do we need this initialization here at all?
3609 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003610
Jonathan Peyton30419822017-05-12 18:01:32 +00003611 // do not call __kmp_serial_initialize(), it will be called by
3612 // __kmp_parallel_initialize() if needed
3613 // possible detection of false-positive race by the threadchecker ???
3614 if (!TCR_4(__kmp_init_parallel))
3615 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003616
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003617#if OMP_50_ENABLED
3618 __kmp_resume_if_soft_paused();
3619#endif
3620
Jonathan Peyton30419822017-05-12 18:01:32 +00003621// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003622#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003623 if (__kmp_env_consistency_check)
3624 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003625#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003626 if (__kmp_env_consistency_check)
3627 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003628#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003629
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003630#if OMP_40_ENABLED
3631 th = __kmp_thread_from_gtid(global_tid);
3632 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3633#endif // OMP_40_ENABLED
3634
Jonathan Peyton30419822017-05-12 18:01:32 +00003635 packed_reduction_method = __kmp_determine_reduction_method(
3636 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3637 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003638
Jonathan Peyton30419822017-05-12 18:01:32 +00003639 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003640
Jonathan Peyton30419822017-05-12 18:01:32 +00003641 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3642 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
Jonathan Peyton30419822017-05-12 18:01:32 +00003646 // usage: if team size == 1, no synchronization is required ( Intel
3647 // platforms only )
3648 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003649
Jonathan Peyton30419822017-05-12 18:01:32 +00003650 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003651
Jonathan Peyton30419822017-05-12 18:01:32 +00003652 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003653
Jonathan Peyton30419822017-05-12 18:01:32 +00003654 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3655 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003656
Jonathan Peyton30419822017-05-12 18:01:32 +00003657// case tree_reduce_block:
3658// this barrier should be visible to a customer and to the threading profile
3659// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003660#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003661 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003662 if (ompt_enabled.enabled) {
3663 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003664 if (ompt_frame->enter_frame.ptr == NULL)
3665 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003666 OMPT_STORE_RETURN_ADDRESS(global_tid);
3667 }
3668#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003669#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003670 __kmp_threads[global_tid]->th.th_ident =
3671 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003672#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003673 retval =
3674 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3675 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3676 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003677#if OMPT_SUPPORT && OMPT_OPTIONAL
3678 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003679 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003680 }
3681#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 // all other workers except master should do this pop here
3684 // ( none of other workers except master will enter __kmpc_end_reduce() )
3685 if (__kmp_env_consistency_check) {
3686 if (retval == 0) { // 0: all other workers; 1: master
3687 __kmp_pop_sync(global_tid, ct_reduce, loc);
3688 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689 }
3690
Jonathan Peyton30419822017-05-12 18:01:32 +00003691 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692
Jonathan Peyton30419822017-05-12 18:01:32 +00003693 // should never reach this block
3694 KMP_ASSERT(0); // "unexpected method"
3695 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003696#if OMP_40_ENABLED
3697 if (teams_swapped) {
3698 __kmp_restore_swapped_teams(th, team, task_state);
3699 }
3700#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003701
3702 KA_TRACE(10,
3703 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3704 global_tid, packed_reduction_method, retval));
3705
3706 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003707}
3708
3709/*!
3710@ingroup SYNCHRONIZATION
3711@param loc source location information
3712@param global_tid global thread id.
3713@param lck pointer to the unique lock data structure
3714
3715Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003716The <tt>lck</tt> pointer must be the same as that used in the corresponding
3717start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003719void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3720 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003721
Jonathan Peyton30419822017-05-12 18:01:32 +00003722 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003723#if OMP_40_ENABLED
3724 kmp_info_t *th;
3725 kmp_team_t *team;
3726 int teams_swapped = 0, task_state;
3727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728
Jonathan Peyton30419822017-05-12 18:01:32 +00003729 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003731#if OMP_40_ENABLED
3732 th = __kmp_thread_from_gtid(global_tid);
3733 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3734#endif // OMP_40_ENABLED
3735
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003737
Jonathan Peyton30419822017-05-12 18:01:32 +00003738 // this barrier should be visible to a customer and to the threading profile
3739 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740
Jonathan Peyton30419822017-05-12 18:01:32 +00003741 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003742
Jonathan Peyton30419822017-05-12 18:01:32 +00003743 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003746#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003747 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003748 if (ompt_enabled.enabled) {
3749 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003750 if (ompt_frame->enter_frame.ptr == NULL)
3751 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003752 OMPT_STORE_RETURN_ADDRESS(global_tid);
3753 }
3754#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003755#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003756 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003757#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003758 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003759#if OMPT_SUPPORT && OMPT_OPTIONAL
3760 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003761 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003762 }
3763#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003764
Jonathan Peyton30419822017-05-12 18:01:32 +00003765 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003766
Jonathan Peyton30419822017-05-12 18:01:32 +00003767// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003768
Jonathan Peyton30419822017-05-12 18:01:32 +00003769// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003770#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003771 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003772 if (ompt_enabled.enabled) {
3773 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003774 if (ompt_frame->enter_frame.ptr == NULL)
3775 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003776 OMPT_STORE_RETURN_ADDRESS(global_tid);
3777 }
3778#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003779#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003780 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003781#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003782 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003783#if OMPT_SUPPORT && OMPT_OPTIONAL
3784 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003785 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003786 }
3787#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003788
Jonathan Peyton30419822017-05-12 18:01:32 +00003789 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003790
Joachim Protze82e94a52017-11-01 10:08:30 +00003791#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003792 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003793 if (ompt_enabled.enabled) {
3794 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003795 if (ompt_frame->enter_frame.ptr == NULL)
3796 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003797 OMPT_STORE_RETURN_ADDRESS(global_tid);
3798 }
3799#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003800// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003801#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003802 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003803#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003804 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003805#if OMPT_SUPPORT && OMPT_OPTIONAL
3806 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003807 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003808 }
3809#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003810
Jonathan Peyton30419822017-05-12 18:01:32 +00003811 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3812 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003813
Jonathan Peyton30419822017-05-12 18:01:32 +00003814 // only master executes here (master releases all other workers)
3815 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3816 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003817
Jonathan Peyton30419822017-05-12 18:01:32 +00003818 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003819
Jonathan Peyton30419822017-05-12 18:01:32 +00003820 // should never reach this block
3821 KMP_ASSERT(0); // "unexpected method"
3822 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003823#if OMP_40_ENABLED
3824 if (teams_swapped) {
3825 __kmp_restore_swapped_teams(th, team, task_state);
3826 }
3827#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003828
Jonathan Peyton30419822017-05-12 18:01:32 +00003829 if (__kmp_env_consistency_check)
3830 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003831
Jonathan Peyton30419822017-05-12 18:01:32 +00003832 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3833 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003834
Jonathan Peyton30419822017-05-12 18:01:32 +00003835 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003836}
3837
3838#undef __KMP_GET_REDUCTION_METHOD
3839#undef __KMP_SET_REDUCTION_METHOD
3840
Jonathan Peyton30419822017-05-12 18:01:32 +00003841/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jonathan Peyton30419822017-05-12 18:01:32 +00003843kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003844
Jonathan Peyton30419822017-05-12 18:01:32 +00003845 kmp_int32 gtid;
3846 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003847
Jonathan Peyton30419822017-05-12 18:01:32 +00003848 gtid = __kmp_get_gtid();
3849 if (gtid < 0) {
3850 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003851 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003852 thread = __kmp_thread_from_gtid(gtid);
3853 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003854
3855} // __kmpc_get_taskid
3856
Jonathan Peyton30419822017-05-12 18:01:32 +00003857kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003858
Jonathan Peyton30419822017-05-12 18:01:32 +00003859 kmp_int32 gtid;
3860 kmp_info_t *thread;
3861 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003862
Jonathan Peyton30419822017-05-12 18:01:32 +00003863 gtid = __kmp_get_gtid();
3864 if (gtid < 0) {
3865 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003866 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003867 thread = __kmp_thread_from_gtid(gtid);
3868 parent_task = thread->th.th_current_task->td_parent;
3869 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003870
3871} // __kmpc_get_parent_taskid
3872
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003873#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003874/*!
3875@ingroup WORK_SHARING
3876@param loc source location information.
3877@param gtid global thread number.
3878@param num_dims number of associated doacross loops.
3879@param dims info on loops bounds.
3880
3881Initialize doacross loop information.
3882Expect compiler send us inclusive bounds,
3883e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3884*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003885void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003886 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003887 int j, idx;
3888 kmp_int64 last, trace_count;
3889 kmp_info_t *th = __kmp_threads[gtid];
3890 kmp_team_t *team = th->th.th_team;
3891 kmp_uint32 *flags;
3892 kmp_disp_t *pr_buf = th->th.th_dispatch;
3893 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003894
Jonathan Peyton30419822017-05-12 18:01:32 +00003895 KA_TRACE(
3896 20,
3897 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3898 gtid, num_dims, !team->t.t_serialized));
3899 KMP_DEBUG_ASSERT(dims != NULL);
3900 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003901
Jonathan Peyton30419822017-05-12 18:01:32 +00003902 if (team->t.t_serialized) {
3903 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3904 return; // no dependencies if team is serialized
3905 }
3906 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3907 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3908 // the next loop
3909 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003910
Jonathan Peyton30419822017-05-12 18:01:32 +00003911 // Save bounds info into allocated private buffer
3912 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3913 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3914 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3915 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3916 pr_buf->th_doacross_info[0] =
3917 (kmp_int64)num_dims; // first element is number of dimensions
3918 // Save also address of num_done in order to access it later without knowing
3919 // the buffer index
3920 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3921 pr_buf->th_doacross_info[2] = dims[0].lo;
3922 pr_buf->th_doacross_info[3] = dims[0].up;
3923 pr_buf->th_doacross_info[4] = dims[0].st;
3924 last = 5;
3925 for (j = 1; j < num_dims; ++j) {
3926 kmp_int64
3927 range_length; // To keep ranges of all dimensions but the first dims[0]
3928 if (dims[j].st == 1) { // most common case
3929 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3930 range_length = dims[j].up - dims[j].lo + 1;
3931 } else {
3932 if (dims[j].st > 0) {
3933 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3934 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3935 } else { // negative increment
3936 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3937 range_length =
3938 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3939 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003940 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003941 pr_buf->th_doacross_info[last++] = range_length;
3942 pr_buf->th_doacross_info[last++] = dims[j].lo;
3943 pr_buf->th_doacross_info[last++] = dims[j].up;
3944 pr_buf->th_doacross_info[last++] = dims[j].st;
3945 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003946
Jonathan Peyton30419822017-05-12 18:01:32 +00003947 // Compute total trip count.
3948 // Start with range of dims[0] which we don't need to keep in the buffer.
3949 if (dims[0].st == 1) { // most common case
3950 trace_count = dims[0].up - dims[0].lo + 1;
3951 } else if (dims[0].st > 0) {
3952 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3953 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3954 } else { // negative increment
3955 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3956 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3957 }
3958 for (j = 1; j < num_dims; ++j) {
3959 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3960 }
3961 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003962
Jonathan Peyton30419822017-05-12 18:01:32 +00003963 // Check if shared buffer is not occupied by other loop (idx -
3964 // __kmp_dispatch_num_buffers)
3965 if (idx != sh_buf->doacross_buf_idx) {
3966 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003967 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3968 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003969 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003970#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003971 // Check if we are the first thread. After the CAS the first thread gets 0,
3972 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003973 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3974 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3975 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3976#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003977 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003978 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3979#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003980 if (flags == NULL) {
3981 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003982 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003983 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3984 KMP_MB();
3985 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003986 } else if (flags == (kmp_uint32 *)1) {
3987#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003988 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003989 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3990#else
3991 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3992#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003993 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003994 KMP_MB();
3995 } else {
3996 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003997 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003998 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003999 pr_buf->th_doacross_flags =
4000 sh_buf->doacross_flags; // save private copy in order to not
4001 // touch shared buffer on each iteration
4002 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004003}
4004
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004005void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004006 kmp_int32 shft, num_dims, i;
4007 kmp_uint32 flag;
4008 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4009 kmp_info_t *th = __kmp_threads[gtid];
4010 kmp_team_t *team = th->th.th_team;
4011 kmp_disp_t *pr_buf;
4012 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004013
Jonathan Peyton30419822017-05-12 18:01:32 +00004014 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4015 if (team->t.t_serialized) {
4016 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4017 return; // no dependencies if team is serialized
4018 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004019
Jonathan Peyton30419822017-05-12 18:01:32 +00004020 // calculate sequential iteration number and check out-of-bounds condition
4021 pr_buf = th->th.th_dispatch;
4022 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4023 num_dims = pr_buf->th_doacross_info[0];
4024 lo = pr_buf->th_doacross_info[2];
4025 up = pr_buf->th_doacross_info[3];
4026 st = pr_buf->th_doacross_info[4];
4027 if (st == 1) { // most common case
4028 if (vec[0] < lo || vec[0] > up) {
4029 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030 "bounds [%lld,%lld]\n",
4031 gtid, vec[0], lo, up));
4032 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004033 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004034 iter_number = vec[0] - lo;
4035 } else if (st > 0) {
4036 if (vec[0] < lo || vec[0] > up) {
4037 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038 "bounds [%lld,%lld]\n",
4039 gtid, vec[0], lo, up));
4040 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004041 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004042 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4043 } else { // negative increment
4044 if (vec[0] > lo || vec[0] < up) {
4045 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4046 "bounds [%lld,%lld]\n",
4047 gtid, vec[0], lo, up));
4048 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004049 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004050 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4051 }
4052 for (i = 1; i < num_dims; ++i) {
4053 kmp_int64 iter, ln;
4054 kmp_int32 j = i * 4;
4055 ln = pr_buf->th_doacross_info[j + 1];
4056 lo = pr_buf->th_doacross_info[j + 2];
4057 up = pr_buf->th_doacross_info[j + 3];
4058 st = pr_buf->th_doacross_info[j + 4];
4059 if (st == 1) {
4060 if (vec[i] < lo || vec[i] > up) {
4061 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4062 "bounds [%lld,%lld]\n",
4063 gtid, vec[i], lo, up));
4064 return;
4065 }
4066 iter = vec[i] - lo;
4067 } else if (st > 0) {
4068 if (vec[i] < lo || vec[i] > up) {
4069 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4070 "bounds [%lld,%lld]\n",
4071 gtid, vec[i], lo, up));
4072 return;
4073 }
4074 iter = (kmp_uint64)(vec[i] - lo) / st;
4075 } else { // st < 0
4076 if (vec[i] > lo || vec[i] < up) {
4077 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4078 "bounds [%lld,%lld]\n",
4079 gtid, vec[i], lo, up));
4080 return;
4081 }
4082 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4083 }
4084 iter_number = iter + ln * iter_number;
4085 }
4086 shft = iter_number % 32; // use 32-bit granularity
4087 iter_number >>= 5; // divided by 32
4088 flag = 1 << shft;
4089 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4090 KMP_YIELD(TRUE);
4091 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004092 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004093 KA_TRACE(20,
4094 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4095 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004096}
4097
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004098void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004099 kmp_int32 shft, num_dims, i;
4100 kmp_uint32 flag;
4101 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4102 kmp_info_t *th = __kmp_threads[gtid];
4103 kmp_team_t *team = th->th.th_team;
4104 kmp_disp_t *pr_buf;
4105 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004106
Jonathan Peyton30419822017-05-12 18:01:32 +00004107 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4108 if (team->t.t_serialized) {
4109 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4110 return; // no dependencies if team is serialized
4111 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004112
Jonathan Peyton30419822017-05-12 18:01:32 +00004113 // calculate sequential iteration number (same as in "wait" but no
4114 // out-of-bounds checks)
4115 pr_buf = th->th.th_dispatch;
4116 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4117 num_dims = pr_buf->th_doacross_info[0];
4118 lo = pr_buf->th_doacross_info[2];
4119 st = pr_buf->th_doacross_info[4];
4120 if (st == 1) { // most common case
4121 iter_number = vec[0] - lo;
4122 } else if (st > 0) {
4123 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4124 } else { // negative increment
4125 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4126 }
4127 for (i = 1; i < num_dims; ++i) {
4128 kmp_int64 iter, ln;
4129 kmp_int32 j = i * 4;
4130 ln = pr_buf->th_doacross_info[j + 1];
4131 lo = pr_buf->th_doacross_info[j + 2];
4132 st = pr_buf->th_doacross_info[j + 4];
4133 if (st == 1) {
4134 iter = vec[i] - lo;
4135 } else if (st > 0) {
4136 iter = (kmp_uint64)(vec[i] - lo) / st;
4137 } else { // st < 0
4138 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004139 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004140 iter_number = iter + ln * iter_number;
4141 }
4142 shft = iter_number % 32; // use 32-bit granularity
4143 iter_number >>= 5; // divided by 32
4144 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004145 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004146 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004147 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004148 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4149 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004150}
4151
Jonathan Peyton30419822017-05-12 18:01:32 +00004152void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004153 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004154 kmp_info_t *th = __kmp_threads[gtid];
4155 kmp_team_t *team = th->th.th_team;
4156 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004157
Jonathan Peyton30419822017-05-12 18:01:32 +00004158 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4159 if (team->t.t_serialized) {
4160 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4161 return; // nothing to do
4162 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004163 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004164 if (num_done == th->th.th_team_nproc) {
4165 // we are the last thread, need to free shared resources
4166 int idx = pr_buf->th_doacross_buf_idx - 1;
4167 dispatch_shared_info_t *sh_buf =
4168 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4169 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4170 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004171 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004173 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004174 sh_buf->doacross_flags = NULL;
4175 sh_buf->doacross_num_done = 0;
4176 sh_buf->doacross_buf_idx +=
4177 __kmp_dispatch_num_buffers; // free buffer for future re-use
4178 }
4179 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004180 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004181 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4182 pr_buf->th_doacross_info = NULL;
4183 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004184}
4185#endif
4186
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004187#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004188int __kmpc_get_target_offload(void) {
4189 if (!__kmp_init_serial) {
4190 __kmp_serial_initialize();
4191 }
4192 return __kmp_target_offload;
4193}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004194
4195int __kmpc_pause_resource(kmp_pause_status_t level) {
4196 if (!__kmp_init_serial) {
4197 return 1; // Can't pause if runtime is not initialized
4198 }
4199 return __kmp_pause_resource(level);
4200}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004201#endif // OMP_50_ENABLED
4202
Jim Cownie5e8470a2013-09-27 10:38:44 +00004203// end of file //