blob: 6a63834ce8b2688ff021ba956a4a81788916a582 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton92ca6182018-09-07 18:25:49 +000014#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#include "ompt-specific.h"
25#endif
26
Jim Cownie5e8470a2013-09-27 10:38:44 +000027#define MAX_MESSAGE 512
28
Jonathan Peyton30419822017-05-12 18:01:32 +000029// flags will be used in future, e.g. to implement openmp_strict library
30// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
32/*!
33 * @ingroup STARTUP_SHUTDOWN
34 * @param loc in source location information
35 * @param flags in for future use (currently ignored)
36 *
37 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000039 */
Jonathan Peyton30419822017-05-12 18:01:32 +000040void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41 // By default __kmpc_begin() is no-op.
42 char *env;
43 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44 __kmp_str_match_true(env)) {
45 __kmp_middle_initialize();
46 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47 } else if (__kmp_ignore_mppbeg() == FALSE) {
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 __kmp_internal_begin();
50 KC_TRACE(10, ("__kmpc_begin: called\n"));
51 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000052}
53
54/*!
55 * @ingroup STARTUP_SHUTDOWN
56 * @param loc source location information
57 *
Jonathan Peyton30419822017-05-12 18:01:32 +000058 * Shutdown the runtime library. This is also optional, and even if called will
59 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
60 * zero.
61 */
62void __kmpc_end(ident_t *loc) {
63 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66 // returns FALSE and __kmpc_end() will unregister this root (it can cause
67 // library shut down).
68 if (__kmp_ignore_mppend() == FALSE) {
69 KC_TRACE(10, ("__kmpc_end: called\n"));
70 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072 __kmp_internal_end_thread(-1);
73 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000074#if KMP_OS_WINDOWS && OMPT_SUPPORT
75 // Normal exit process on Windows does not allow worker threads of the final
76 // parallel region to finish reporting their events, so shutting down the
77 // library here fixes the issue at least for the cases where __kmpc_end() is
78 // placed properly.
79 if (ompt_enabled.enabled)
80 __kmp_internal_end_library(__kmp_gtid_get_specific());
81#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000082}
83
84/*!
85@ingroup THREAD_STATES
86@param loc Source location information.
87@return The global thread index of the active thread.
88
89This function can be called in any context.
90
91If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000092single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
93that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000094active parallel construct. (Or zero if there is no active parallel
95construct, since the master thread is necessarily thread zero).
96
97If multiple non-OpenMP threads all enter an OpenMP construct then this
98will be a unique thread identifier among all the threads created by
99the OpenMP runtime (but the value cannote be defined in terms of
100OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000102kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
103 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104
Jonathan Peyton30419822017-05-12 18:01:32 +0000105 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106
Jonathan Peyton30419822017-05-12 18:01:32 +0000107 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000108}
109
110/*!
111@ingroup THREAD_STATES
112@param loc Source location information.
113@return The number of threads under control of the OpenMP<sup>*</sup> runtime
114
115This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000116It returns the total number of threads under the control of the OpenMP runtime.
117That is not a number that can be determined by any OpenMP standard calls, since
118the library may be called from more than one non-OpenMP thread, and this
119reflects the total over all such calls. Similarly the runtime maintains
120underlying threads even when they are not active (since the cost of creating
121and destroying OS threads is high), this call counts all such threads even if
122they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000124kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
125 KC_TRACE(10,
126 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127
Jonathan Peyton30419822017-05-12 18:01:32 +0000128 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129}
130
131/*!
132@ingroup THREAD_STATES
133@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000134@return The thread number of the calling thread in the innermost active parallel
135construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000137kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
138 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000147kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
148 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149
Jonathan Peyton30419822017-05-12 18:01:32 +0000150 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151}
152
153/*!
154 * @ingroup DEPRECATED
155 * @param loc location description
156 *
157 * This function need not be called. It always returns TRUE.
158 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000159kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160#ifndef KMP_DEBUG
161
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163
164#else
165
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 const char *semi2;
167 const char *semi3;
168 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
173 semi2 = loc->psource;
174 if (semi2 == NULL) {
175 return TRUE;
176 }
177 semi2 = strchr(semi2, ';');
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 semi2 = strchr(semi2 + 1, ';');
182 if (semi2 == NULL) {
183 return TRUE;
184 }
185 if (__kmp_par_range_filename[0]) {
186 const char *name = semi2 - 1;
187 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188 name--;
189 }
190 if ((*name == '/') || (*name == ';')) {
191 name++;
192 }
193 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194 return __kmp_par_range < 0;
195 }
196 }
197 semi3 = strchr(semi2 + 1, ';');
198 if (__kmp_par_range_routine[0]) {
199 if ((semi3 != NULL) && (semi3 > semi2) &&
200 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201 return __kmp_par_range < 0;
202 }
203 }
204 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206 return __kmp_par_range > 0;
207 }
208 return __kmp_par_range < 0;
209 }
210 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211
212#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213}
214
215/*!
216@ingroup THREAD_STATES
217@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000218@return 1 if this thread is executing inside an active parallel region, zero if
219not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000221kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223}
224
225/*!
226@ingroup PARALLEL
227@param loc source location information
228@param global_tid global thread number
229@param num_threads number of threads requested for this parallel construct
230
231Set the number of threads to be used by the next fork spawned by this thread.
232This call is only required if the parallel construct has a `num_threads` clause.
233*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000234void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235 kmp_int32 num_threads) {
236 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238
Jonathan Peyton30419822017-05-12 18:01:32 +0000239 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240}
241
Jonathan Peyton30419822017-05-12 18:01:32 +0000242void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246}
247
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248#if OMP_40_ENABLED
249
Jonathan Peyton30419822017-05-12 18:01:32 +0000250void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251 kmp_int32 proc_bind) {
252 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254
Jonathan Peyton30419822017-05-12 18:01:32 +0000255 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000256}
257
258#endif /* OMP_40_ENABLED */
259
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260/*!
261@ingroup PARALLEL
262@param loc source location information
263@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000264@param microtask pointer to callback routine consisting of outlined parallel
265construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266@param ... pointers to shared variables that aren't global
267
268Do the actual fork and call the microtask in the relevant number of threads.
269*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000270void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000272
Jonathan Peyton61118492016-05-20 19:03:38 +0000273#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000274 // If we were in a serial region, then stop the serial timer, record
275 // the event, and start parallel region timer
276 stats_state_e previous_state = KMP_GET_THREAD_STATE();
277 if (previous_state == stats_state_e::SERIAL_REGION) {
278 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 } else {
280 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000282 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 if (inParallel) {
284 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285 } else {
286 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000287 }
288#endif
289
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 // maybe to save thr_state is enough here
291 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000292 va_list ap;
293 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000295#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000296 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000297 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000298 kmp_info_t *master_th = __kmp_threads[gtid];
299 kmp_team_t *parent_team = master_th->th.th_team;
300 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 if (lwt)
302 ompt_frame = &(lwt->ompt_task_info.frame);
303 else {
304 int tid = __kmp_tid_from_gtid(gtid);
305 ompt_frame = &(
306 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 }
Joachim Protzec255ca72017-11-05 14:11:10 +0000308 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000309 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000310 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000311#endif
312
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313#if INCLUDE_SSC_MARKS
314 SSC_MARK_FORKING();
315#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000320#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000326#if INCLUDE_SSC_MARKS
327 SSC_MARK_JOINING();
328#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000329 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000330#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000331 ,
332 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000333#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000334 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335
Jonathan Peyton30419822017-05-12 18:01:32 +0000336 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000338
339#if KMP_STATS_ENABLED
340 if (previous_state == stats_state_e::SERIAL_REGION) {
341 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 } else {
343 KMP_POP_PARTITIONED_TIMER();
344 }
345#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346}
347
348#if OMP_40_ENABLED
349/*!
350@ingroup PARALLEL
351@param loc source location information
352@param global_tid global thread number
353@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000354@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355
356Set the number of teams to be used by the teams construct.
357This call is only required if the teams construct has a `num_teams` clause
358or a `thread_limit` clause (or both).
359*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000360void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361 kmp_int32 num_teams, kmp_int32 num_threads) {
362 KA_TRACE(20,
363 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365
Jonathan Peyton30419822017-05-12 18:01:32 +0000366 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367}
368
369/*!
370@ingroup PARALLEL
371@param loc source location information
372@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000373@param microtask pointer to callback routine consisting of outlined teams
374construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000375@param ... pointers to shared variables that aren't global
376
377Do the actual fork and call the microtask in the relevant number of threads.
378*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000379void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380 ...) {
381 int gtid = __kmp_entry_gtid();
382 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_list ap;
384 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000385
Jonathan Peyton30419822017-05-12 18:01:32 +0000386 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000387
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 // remember teams entry point and nesting level
389 this_thr->th.th_teams_microtask = microtask;
390 this_thr->th.th_teams_level =
391 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000393#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 kmp_team_t *parent_team = this_thr->th.th_team;
395 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000396 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000397 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protzec255ca72017-11-05 14:11:10 +0000398 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000400 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000401#endif
402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 // check if __kmpc_push_num_teams called, set default number of teams
404 // otherwise
405 if (this_thr->th.th_teams_size.nteams == 0) {
406 __kmp_push_num_teams(loc, gtid, 0, 0);
407 }
408 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000413 VOLATILE_CAST(microtask_t)
414 __kmp_teams_master, // "wrapped" task
415 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000416#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000417 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000420#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000421 );
422 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000423#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000424 ,
425 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000426#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000427 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000428
Jonathan Peyton30419822017-05-12 18:01:32 +0000429 this_thr->th.th_teams_microtask = NULL;
430 this_thr->th.th_teams_level = 0;
431 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433}
434#endif /* OMP_40_ENABLED */
435
Jim Cownie5e8470a2013-09-27 10:38:44 +0000436// I don't think this function should ever have been exported.
437// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438// openmp code ever called it, but it's been exported from the RTL for so
439// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000440int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000441
442/*!
443@ingroup PARALLEL
444@param loc source location information
445@param global_tid global thread number
446
447Enter a serialized parallel construct. This interface is used to handle a
448conditional parallel region, like this,
449@code
450#pragma omp parallel if (condition)
451@endcode
452when the condition is false.
453*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000454void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000455// The implementation is now in kmp_runtime.cpp so that it can share static
456// functions with kmp_fork_call since the tasks to be done are similar in
457// each case.
458#if OMPT_SUPPORT
459 OMPT_STORE_RETURN_ADDRESS(global_tid);
460#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000462}
463
464/*!
465@ingroup PARALLEL
466@param loc source location information
467@param global_tid global thread number
468
469Leave a serialized parallel construct.
470*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000471void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472 kmp_internal_control_t *top;
473 kmp_info_t *this_thr;
474 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 KC_TRACE(10,
477 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 /* skip all this code for autopar serialized loops since it results in
480 unacceptable overhead */
481 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483
Jonathan Peyton30419822017-05-12 18:01:32 +0000484 // Not autopar code
485 if (!TCR_4(__kmp_init_parallel))
486 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 this_thr = __kmp_threads[global_tid];
489 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491#if OMP_45_ENABLED
492 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000493
Jonathan Peyton30419822017-05-12 18:01:32 +0000494 // we need to wait for the proxy tasks before finishing the thread
495 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
497#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000498
Jonathan Peyton30419822017-05-12 18:01:32 +0000499 KMP_MB();
500 KMP_DEBUG_ASSERT(serial_team);
501 KMP_ASSERT(serial_team->t.t_serialized);
502 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000506
Joachim Protze82e94a52017-11-01 10:08:30 +0000507#if OMPT_SUPPORT
508 if (ompt_enabled.enabled &&
509 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000510 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +0000511 if (ompt_enabled.ompt_callback_implicit_task) {
512 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +0000514 OMPT_CUR_TASK_INFO(this_thr)->thread_num);
Joachim Protze82e94a52017-11-01 10:08:30 +0000515 }
516
517 // reset clear the task id only after unlinking the task
518 ompt_data_t *parent_task_data;
519 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
520
521 if (ompt_enabled.ompt_callback_parallel_end) {
522 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000524 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000525 }
526 __ompt_lw_taskteam_unlink(this_thr);
527 this_thr->th.ompt_thread_info.state = omp_state_overhead;
528 }
529#endif
530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 /* If necessary, pop the internal control stack values and replace the team
532 * values */
533 top = serial_team->t.t_control_stack_top;
534 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536 serial_team->t.t_control_stack_top = top->next;
537 __kmp_free(top);
538 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539
Jonathan Peyton30419822017-05-12 18:01:32 +0000540 // if( serial_team -> t.t_serialized > 1 )
541 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542
Jonathan Peyton30419822017-05-12 18:01:32 +0000543 /* pop dispatch buffers stack */
544 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
545 {
546 dispatch_private_info_t *disp_buffer =
547 serial_team->t.t_dispatch->th_disp_buffer;
548 serial_team->t.t_dispatch->th_disp_buffer =
549 serial_team->t.t_dispatch->th_disp_buffer->next;
550 __kmp_free(disp_buffer);
551 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000552#if OMP_50_ENABLED
553 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555
Jonathan Peyton30419822017-05-12 18:01:32 +0000556 --serial_team->t.t_serialized;
557 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000560
561#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000562 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563 __kmp_clear_x87_fpu_status_word();
564 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
566 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000567#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568
Jonathan Peyton30419822017-05-12 18:01:32 +0000569 this_thr->th.th_team = serial_team->t.t_parent;
570 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 /* restore values cached in the thread */
573 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
574 this_thr->th.th_team_master =
575 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000577
Jonathan Peyton30419822017-05-12 18:01:32 +0000578 /* TODO the below shouldn't need to be adjusted for serialized teams */
579 this_thr->th.th_dispatch =
580 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581
Jonathan Peyton30419822017-05-12 18:01:32 +0000582 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586
Jonathan Peyton30419822017-05-12 18:01:32 +0000587 if (__kmp_tasking_mode != tskm_immediate_exec) {
588 // Copy the task team from the new child / old parent team to the thread.
589 this_thr->th.th_task_team =
590 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591 KA_TRACE(20,
592 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
593 "team %p\n",
594 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000596 } else {
597 if (__kmp_tasking_mode != tskm_immediate_exec) {
598 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
599 "depth of serial team %p to %d\n",
600 global_tid, serial_team, serial_team->t.t_serialized));
601 }
602 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603
Jonathan Peyton30419822017-05-12 18:01:32 +0000604 if (__kmp_env_consistency_check)
605 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000606#if OMPT_SUPPORT
607 if (ompt_enabled.enabled)
608 this_thr->th.ompt_thread_info.state =
609 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
610 : omp_state_work_parallel);
611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612}
613
614/*!
615@ingroup SYNCHRONIZATION
616@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000618Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619depending on the memory ordering convention obeyed by the compiler
620even that may not be necessary).
621*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000622void __kmpc_flush(ident_t *loc) {
623 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000624
Jonathan Peyton30419822017-05-12 18:01:32 +0000625 /* need explicit __mf() here since use volatile instead in library */
626 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627
Jonathan Peyton30419822017-05-12 18:01:32 +0000628#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
629#if KMP_MIC
630// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631// We shouldn't need it, though, since the ABI rules require that
632// * If the compiler generates NGO stores it also generates the fence
633// * If users hand-code NGO stores they should insert the fence
634// therefore no incomplete unordered stores should be visible.
635#else
636 // C74404
637 // This is to address non-temporal store instructions (sfence needed).
638 // The clflush instruction is addressed either (mfence needed).
639 // Probably the non-temporal load monvtdqa instruction should also be
640 // addressed.
641 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642 if (!__kmp_cpuinfo.initialized) {
643 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000644 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000645 if (!__kmp_cpuinfo.sse2) {
646 // CPU cannot execute SSE2 instructions.
647 } else {
648#if KMP_COMPILER_ICC
649 _mm_mfence();
650#elif KMP_COMPILER_MSVC
651 MemoryBarrier();
652#else
653 __sync_synchronize();
654#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000655 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000656#endif // KMP_MIC
657#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
658// Nothing to see here move along
659#elif KMP_ARCH_PPC64
660// Nothing needed here (we have a real MB above).
661#if KMP_OS_CNK
662 // The flushing thread needs to yield here; this prevents a
663 // busy-waiting thread from saturating the pipeline. flush is
664 // often used in loops like this:
665 // while (!flag) {
666 // #pragma omp flush(flag)
667 // }
668 // and adding the yield here is good for at least a 10x speedup
669 // when running >2 threads per core (on the NAS LU benchmark).
670 __kmp_yield(TRUE);
671#endif
672#else
673#error Unknown or unsupported architecture
674#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000675
676#if OMPT_SUPPORT && OMPT_OPTIONAL
677 if (ompt_enabled.ompt_callback_flush) {
678 ompt_callbacks.ompt_callback(ompt_callback_flush)(
679 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
680 }
681#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682}
683
684/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685/*!
686@ingroup SYNCHRONIZATION
687@param loc source location information
688@param global_tid thread id.
689
690Execute a barrier.
691*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000692void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693 KMP_COUNT_BLOCK(OMP_BARRIER);
694 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 if (!TCR_4(__kmp_init_parallel))
697 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698
Jonathan Peyton30419822017-05-12 18:01:32 +0000699 if (__kmp_env_consistency_check) {
700 if (loc == 0) {
701 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000702 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703
Jonathan Peyton30419822017-05-12 18:01:32 +0000704 __kmp_check_barrier(global_tid, ct_barrier, loc);
705 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000706
Joachim Protze82e94a52017-11-01 10:08:30 +0000707#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000708 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000709 if (ompt_enabled.enabled) {
710 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +0000711 if (ompt_frame->enter_frame == NULL)
712 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000713 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000714 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000715#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 __kmp_threads[global_tid]->th.th_ident = loc;
717 // TODO: explicit barrier_wait_id:
718 // this function is called when 'barrier' directive is present or
719 // implicit barrier at the end of a worksharing construct.
720 // 1) better to add a per-thread barrier counter to a thread data structure
721 // 2) set to 0 when a new team is created
722 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000725#if OMPT_SUPPORT && OMPT_OPTIONAL
726 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000727 ompt_frame->enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +0000728 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000729#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730}
731
732/* The BARRIER for a MASTER section is always explicit */
733/*!
734@ingroup WORK_SHARING
735@param loc source location information.
736@param global_tid global thread number .
737@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
738*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000739kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 if (KMP_MASTER_GTID(global_tid)) {
748 KMP_COUNT_BLOCK(OMP_MASTER);
749 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
750 status = 1;
751 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
Joachim Protze82e94a52017-11-01 10:08:30 +0000753#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000754 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000755 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000756 kmp_info_t *this_thr = __kmp_threads[global_tid];
757 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000758
Jonathan Peyton30419822017-05-12 18:01:32 +0000759 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000760 ompt_callbacks.ompt_callback(ompt_callback_master)(
761 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000764 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000766#endif
767
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000769#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000770 if (status)
771 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
772 else
773 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000774#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000775 if (status)
776 __kmp_push_sync(global_tid, ct_master, loc, NULL);
777 else
778 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000779#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781
Jonathan Peyton30419822017-05-12 18:01:32 +0000782 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783}
784
785/*!
786@ingroup WORK_SHARING
787@param loc source location information.
788@param global_tid global thread number .
789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790Mark the end of a <tt>master</tt> region. This should only be called by the
791thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000793void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798
Joachim Protze82e94a52017-11-01 10:08:30 +0000799#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000800 kmp_info_t *this_thr = __kmp_threads[global_tid];
801 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000802 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000804 ompt_callbacks.ompt_callback(ompt_callback_master)(
805 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000809#endif
810
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 if (__kmp_env_consistency_check) {
812 if (global_tid < 0)
813 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814
Jonathan Peyton30419822017-05-12 18:01:32 +0000815 if (KMP_MASTER_GTID(global_tid))
816 __kmp_pop_sync(global_tid, ct_master, loc);
817 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000818}
819
820/*!
821@ingroup WORK_SHARING
822@param loc source location information.
823@param gtid global thread number.
824
825Start execution of an <tt>ordered</tt> construct.
826*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000827void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828 int cid = 0;
829 kmp_info_t *th;
830 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
Jonathan Peyton30419822017-05-12 18:01:32 +0000832 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833
Jonathan Peyton30419822017-05-12 18:01:32 +0000834 if (!TCR_4(__kmp_init_parallel))
835 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000836
837#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 __kmp_itt_ordered_prep(gtid);
839// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840#endif /* USE_ITT_BUILD */
841
Jonathan Peyton30419822017-05-12 18:01:32 +0000842 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843
Joachim Protze82e94a52017-11-01 10:08:30 +0000844#if OMPT_SUPPORT && OMPT_OPTIONAL
845 kmp_team_t *team;
Joachim Protze40636132018-05-28 08:16:08 +0000846 omp_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000847 void *codeptr_ra;
848 if (ompt_enabled.enabled) {
849 OMPT_STORE_RETURN_ADDRESS(gtid);
850 team = __kmp_team_from_gtid(gtid);
Joachim Protze40636132018-05-28 08:16:08 +0000851 lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000852 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000853 th->th.ompt_thread_info.wait_id = lck;
854 th->th.ompt_thread_info.state = omp_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000855
Jonathan Peyton30419822017-05-12 18:01:32 +0000856 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000857 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858 if (ompt_enabled.ompt_callback_mutex_acquire) {
859 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000860 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze40636132018-05-28 08:16:08 +0000861 (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000862 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000863 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000864#endif
865
Jonathan Peyton30419822017-05-12 18:01:32 +0000866 if (th->th.th_dispatch->th_deo_fcn != 0)
867 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868 else
869 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870
Joachim Protze82e94a52017-11-01 10:08:30 +0000871#if OMPT_SUPPORT && OMPT_OPTIONAL
872 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000874 th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000875 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000876
Jonathan Peyton30419822017-05-12 18:01:32 +0000877 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000878 if (ompt_enabled.ompt_callback_mutex_acquired) {
879 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +0000880 ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000881 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000883#endif
884
Jim Cownie5e8470a2013-09-27 10:38:44 +0000885#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000886 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887#endif /* USE_ITT_BUILD */
888}
889
890/*!
891@ingroup WORK_SHARING
892@param loc source location information.
893@param gtid global thread number.
894
895End execution of an <tt>ordered</tt> construct.
896*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000897void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898 int cid = 0;
899 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000900
Jonathan Peyton30419822017-05-12 18:01:32 +0000901 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000902
903#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000904 __kmp_itt_ordered_end(gtid);
905// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000906#endif /* USE_ITT_BUILD */
907
Jonathan Peyton30419822017-05-12 18:01:32 +0000908 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000909
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 if (th->th.th_dispatch->th_dxo_fcn != 0)
911 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912 else
913 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000914
Joachim Protze82e94a52017-11-01 10:08:30 +0000915#if OMPT_SUPPORT && OMPT_OPTIONAL
916 OMPT_STORE_RETURN_ADDRESS(gtid);
917 if (ompt_enabled.ompt_callback_mutex_released) {
918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
919 ompt_mutex_ordered,
Joachim Protze40636132018-05-28 08:16:08 +0000920 (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000921 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000922 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924}
925
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000926#if KMP_USE_DYNAMIC_LOCK
927
Jonathan Peytondae13d82015-12-11 21:57:06 +0000928static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000929__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
930 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
931 // Pointer to the allocated indirect lock is written to crit, while indexing
932 // is ignored.
933 void *idx;
934 kmp_indirect_lock_t **lck;
935 lck = (kmp_indirect_lock_t **)crit;
936 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
937 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
938 KMP_SET_I_LOCK_LOCATION(ilk, loc);
939 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
940 KA_TRACE(20,
941 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000942#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000943 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000944#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000945 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000946 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000947#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000948 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000949#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000950 // We don't really need to destroy the unclaimed lock here since it will be
951 // cleaned up at program exit.
952 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
953 }
954 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000955}
956
957// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000958#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
959 { \
960 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000961 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
962 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
963 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
964 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000965 kmp_uint32 spins; \
966 KMP_FSYNC_PREPARE(l); \
967 KMP_INIT_YIELD(spins); \
968 if (TCR_4(__kmp_nth) > \
969 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
970 KMP_YIELD(TRUE); \
971 } else { \
972 KMP_YIELD_SPIN(spins); \
973 } \
974 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000975 while ( \
976 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
977 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000978 __kmp_spin_backoff(&backoff); \
979 if (TCR_4(__kmp_nth) > \
980 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
981 KMP_YIELD(TRUE); \
982 } else { \
983 KMP_YIELD_SPIN(spins); \
984 } \
985 } \
986 } \
987 KMP_FSYNC_ACQUIRED(l); \
988 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000989
990// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000991#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
992 { \
993 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000994 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
995 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
996 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
997 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000998 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000999
1000// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001001#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001002 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001003
Jonathan Peytondae13d82015-12-11 21:57:06 +00001004#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001005
Jonathan Peyton30419822017-05-12 18:01:32 +00001006#include <sys/syscall.h>
1007#include <unistd.h>
1008#ifndef FUTEX_WAIT
1009#define FUTEX_WAIT 0
1010#endif
1011#ifndef FUTEX_WAKE
1012#define FUTEX_WAKE 1
1013#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001014
1015// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001016#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1017 { \
1018 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019 kmp_int32 gtid_code = (gtid + 1) << 1; \
1020 KMP_MB(); \
1021 KMP_FSYNC_PREPARE(ftx); \
1022 kmp_int32 poll_val; \
1023 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1024 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1025 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1026 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1027 if (!cond) { \
1028 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1029 poll_val | \
1030 KMP_LOCK_BUSY(1, futex))) { \
1031 continue; \
1032 } \
1033 poll_val |= KMP_LOCK_BUSY(1, futex); \
1034 } \
1035 kmp_int32 rc; \
1036 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1037 NULL, NULL, 0)) != 0) { \
1038 continue; \
1039 } \
1040 gtid_code |= 1; \
1041 } \
1042 KMP_FSYNC_ACQUIRED(ftx); \
1043 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001044
1045// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001046#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1047 { \
1048 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1050 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1051 KMP_FSYNC_ACQUIRED(ftx); \
1052 rc = TRUE; \
1053 } else { \
1054 rc = FALSE; \
1055 } \
1056 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001057
1058// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001059#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1060 { \
1061 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1062 KMP_MB(); \
1063 KMP_FSYNC_RELEASING(ftx); \
1064 kmp_int32 poll_val = \
1065 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1066 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1067 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1068 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1069 } \
1070 KMP_MB(); \
1071 KMP_YIELD(TCR_4(__kmp_nth) > \
1072 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1073 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001074
Jonathan Peytondae13d82015-12-11 21:57:06 +00001075#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001076
1077#else // KMP_USE_DYNAMIC_LOCK
1078
Jonathan Peyton30419822017-05-12 18:01:32 +00001079static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1080 ident_t const *loc,
1081 kmp_int32 gtid) {
1082 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083
Jonathan Peyton30419822017-05-12 18:01:32 +00001084 // Because of the double-check, the following load doesn't need to be volatile
1085 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086
Jonathan Peyton30419822017-05-12 18:01:32 +00001087 if (lck == NULL) {
1088 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001089
Jonathan Peyton30419822017-05-12 18:01:32 +00001090 // Allocate & initialize the lock.
1091 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1092 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1093 __kmp_init_user_lock_with_checks(lck);
1094 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001096 __kmp_itt_critical_creating(lck);
1097// __kmp_itt_critical_creating() should be called *before* the first usage
1098// of underlying lock. It is the only place where we can guarantee it. There
1099// are chances the lock will destroyed with no usage, but it is not a
1100// problem, because this is not real event seen by user but rather setting
1101// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102#endif /* USE_ITT_BUILD */
1103
Jonathan Peyton30419822017-05-12 18:01:32 +00001104 // Use a cmpxchg instruction to slam the start of the critical section with
1105 // the lock pointer. If another thread beat us to it, deallocate the lock,
1106 // and use the lock that the other thread allocated.
1107 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108
Jonathan Peyton30419822017-05-12 18:01:32 +00001109 if (status == 0) {
1110// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001112 __kmp_itt_critical_destroyed(lck);
1113// Let ITT know the lock is destroyed and the same memory location may be reused
1114// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001116 __kmp_destroy_user_lock_with_checks(lck);
1117 __kmp_user_lock_free(&idx, gtid, lck);
1118 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1119 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001121 }
1122 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123}
1124
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001125#endif // KMP_USE_DYNAMIC_LOCK
1126
Jim Cownie5e8470a2013-09-27 10:38:44 +00001127/*!
1128@ingroup WORK_SHARING
1129@param loc source location information.
1130@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001131@param crit identity of the critical section. This could be a pointer to a lock
1132associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001133
1134Enter code protected by a `critical` construct.
1135This function blocks until the executing thread can enter the critical section.
1136*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001137void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1138 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001139#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001140#if OMPT_SUPPORT && OMPT_OPTIONAL
1141 OMPT_STORE_RETURN_ADDRESS(global_tid);
1142#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001143 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001144#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001146#if OMPT_SUPPORT && OMPT_OPTIONAL
1147 omp_state_t prev_state = omp_state_undefined;
1148 ompt_thread_info_t ti;
1149#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001150 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001151
Jonathan Peyton30419822017-05-12 18:01:32 +00001152 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001153
Jonathan Peyton30419822017-05-12 18:01:32 +00001154 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001156 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001158
Jonathan Peyton30419822017-05-12 18:01:32 +00001159 if ((__kmp_user_lock_kind == lk_tas) &&
1160 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1161 lck = (kmp_user_lock_p)crit;
1162 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001163#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001164 else if ((__kmp_user_lock_kind == lk_futex) &&
1165 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1166 lck = (kmp_user_lock_p)crit;
1167 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001169 else { // ticket, queuing or drdpa
1170 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1171 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172
Jonathan Peyton30419822017-05-12 18:01:32 +00001173 if (__kmp_env_consistency_check)
1174 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175
Jonathan Peyton30419822017-05-12 18:01:32 +00001176// since the critical directive binds to all threads, not just the current
1177// team we have to check this even if we are in a serialized team.
1178// also, even if we are the uber thread, we still have to conduct the lock,
1179// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001180
1181#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001182 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001184#if OMPT_SUPPORT && OMPT_OPTIONAL
1185 OMPT_STORE_RETURN_ADDRESS(gtid);
1186 void *codeptr_ra = NULL;
1187 if (ompt_enabled.enabled) {
1188 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1189 /* OMPT state update */
1190 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001191 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001192 ti.state = omp_state_wait_critical;
1193
1194 /* OMPT event callback */
1195 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1196 if (ompt_enabled.ompt_callback_mutex_acquire) {
1197 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1198 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00001199 (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001200 }
1201 }
1202#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001203 // Value of 'crit' should be good for using as a critical_id of the critical
1204 // section directive.
1205 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001206
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001207#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001208 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001209#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001210#if OMPT_SUPPORT && OMPT_OPTIONAL
1211 if (ompt_enabled.enabled) {
1212 /* OMPT state update */
1213 ti.state = prev_state;
1214 ti.wait_id = 0;
1215
1216 /* OMPT event callback */
1217 if (ompt_enabled.ompt_callback_mutex_acquired) {
1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001219 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001220 }
1221 }
1222#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001223 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001224
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001225 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001226 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001227#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001228}
1229
1230#if KMP_USE_DYNAMIC_LOCK
1231
1232// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001233static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001234#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001235#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001236#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001237#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001238#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001239
1240#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001241#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001242#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001243#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001244#endif
1245
Jonathan Peyton30419822017-05-12 18:01:32 +00001246 // Hints that do not require further logic
1247 if (hint & kmp_lock_hint_hle)
1248 return KMP_TSX_LOCK(hle);
1249 if (hint & kmp_lock_hint_rtm)
1250 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1251 if (hint & kmp_lock_hint_adaptive)
1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001253
Jonathan Peyton30419822017-05-12 18:01:32 +00001254 // Rule out conflicting hints first by returning the default lock
1255 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001256 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001257 if ((hint & omp_lock_hint_speculative) &&
1258 (hint & omp_lock_hint_nonspeculative))
1259 return __kmp_user_lock_seq;
1260
1261 // Do not even consider speculation when it appears to be contended
1262 if (hint & omp_lock_hint_contended)
1263 return lockseq_queuing;
1264
1265 // Uncontended lock without speculation
1266 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1267 return lockseq_tas;
1268
1269 // HLE lock for speculation
1270 if (hint & omp_lock_hint_speculative)
1271 return KMP_TSX_LOCK(hle);
1272
1273 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001274}
1275
Joachim Protze82e94a52017-11-01 10:08:30 +00001276#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001277#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001278static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001279__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1280 if (user_lock) {
1281 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1282 case 0:
1283 break;
1284#if KMP_USE_FUTEX
1285 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001286 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001287#endif
1288 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001289 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001290#if KMP_USE_TSX
1291 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001292 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001293#endif
1294 default:
Jonathan Peyton1742ece2018-11-28 20:19:53 +00001295 return ompt_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001296 }
1297 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1298 }
1299 KMP_ASSERT(ilock);
1300 switch (ilock->type) {
1301#if KMP_USE_TSX
1302 case locktag_adaptive:
1303 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001304 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001305#endif
1306 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001307 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001308#if KMP_USE_FUTEX
1309 case locktag_nested_futex:
1310#endif
1311 case locktag_ticket:
1312 case locktag_queuing:
1313 case locktag_drdpa:
1314 case locktag_nested_ticket:
1315 case locktag_nested_queuing:
1316 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001317 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001318 default:
Jonathan Peyton1742ece2018-11-28 20:19:53 +00001319 return ompt_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001320 }
1321}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001322#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001323// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001324static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001325 switch (__kmp_user_lock_kind) {
1326 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001327 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001328#if KMP_USE_FUTEX
1329 case lk_futex:
1330#endif
1331 case lk_ticket:
1332 case lk_queuing:
1333 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001334 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001335#if KMP_USE_TSX
1336 case lk_hle:
1337 case lk_rtm:
1338 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001339 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001340#endif
1341 default:
Jonathan Peyton1742ece2018-11-28 20:19:53 +00001342 return ompt_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001343 }
1344}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001345#endif // KMP_USE_DYNAMIC_LOCK
1346#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001347
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001348/*!
1349@ingroup WORK_SHARING
1350@param loc source location information.
1351@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001352@param crit identity of the critical section. This could be a pointer to a lock
1353associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001354@param hint the lock hint.
1355
Jonathan Peyton30419822017-05-12 18:01:32 +00001356Enter code protected by a `critical` construct with a hint. The hint value is
1357used to suggest a lock implementation. This function blocks until the executing
1358thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001359speculative execution and the hardware supports it.
1360*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001361void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001362 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001363 KMP_COUNT_BLOCK(OMP_CRITICAL);
1364 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001365#if OMPT_SUPPORT && OMPT_OPTIONAL
1366 omp_state_t prev_state = omp_state_undefined;
1367 ompt_thread_info_t ti;
1368 // This is the case, if called from __kmpc_critical:
1369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1370 if (!codeptr)
1371 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1372#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001373
Jonathan Peyton30419822017-05-12 18:01:32 +00001374 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001375
Jonathan Peyton30419822017-05-12 18:01:32 +00001376 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1377 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001378 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001379 if (*lk == 0) {
1380 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1381 if (KMP_IS_D_LOCK(lckseq)) {
1382 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1383 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001384 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001385 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001386 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001387 }
1388 // Branch for accessing the actual lock object and set operation. This
1389 // branching is inevitable since this lock initialization does not follow the
1390 // normal dispatch path (lock table is not used).
1391 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1392 lck = (kmp_user_lock_p)lk;
1393 if (__kmp_env_consistency_check) {
1394 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1395 __kmp_map_hint_to_lock(hint));
1396 }
1397#if USE_ITT_BUILD
1398 __kmp_itt_critical_acquiring(lck);
1399#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001400#if OMPT_SUPPORT && OMPT_OPTIONAL
1401 if (ompt_enabled.enabled) {
1402 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1403 /* OMPT state update */
1404 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001405 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001406 ti.state = omp_state_wait_critical;
1407
1408 /* OMPT event callback */
1409 if (ompt_enabled.ompt_callback_mutex_acquire) {
1410 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1411 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001412 __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001413 }
1414 }
1415#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001416#if KMP_USE_INLINED_TAS
1417 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1418 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1419 } else
1420#elif KMP_USE_INLINED_FUTEX
1421 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1422 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1423 } else
1424#endif
1425 {
1426 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1427 }
1428 } else {
1429 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1430 lck = ilk->lock;
1431 if (__kmp_env_consistency_check) {
1432 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1433 __kmp_map_hint_to_lock(hint));
1434 }
1435#if USE_ITT_BUILD
1436 __kmp_itt_critical_acquiring(lck);
1437#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001438#if OMPT_SUPPORT && OMPT_OPTIONAL
1439 if (ompt_enabled.enabled) {
1440 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1441 /* OMPT state update */
1442 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001443 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001444 ti.state = omp_state_wait_critical;
1445
1446 /* OMPT event callback */
1447 if (ompt_enabled.ompt_callback_mutex_acquire) {
1448 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1449 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001450 __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001451 }
1452 }
1453#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001454 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1455 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001456 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001457
Jim Cownie5e8470a2013-09-27 10:38:44 +00001458#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001459 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001461#if OMPT_SUPPORT && OMPT_OPTIONAL
1462 if (ompt_enabled.enabled) {
1463 /* OMPT state update */
1464 ti.state = prev_state;
1465 ti.wait_id = 0;
1466
1467 /* OMPT event callback */
1468 if (ompt_enabled.ompt_callback_mutex_acquired) {
1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001470 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001471 }
1472 }
1473#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001474
Jonathan Peyton30419822017-05-12 18:01:32 +00001475 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1476 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001477} // __kmpc_critical_with_hint
1478
1479#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001480
1481/*!
1482@ingroup WORK_SHARING
1483@param loc source location information.
1484@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001485@param crit identity of the critical section. This could be a pointer to a lock
1486associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487
1488Leave a critical section, releasing any lock that was held during its execution.
1489*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001490void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1491 kmp_critical_name *crit) {
1492 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001493
Jonathan Peyton30419822017-05-12 18:01:32 +00001494 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001495
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001496#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001497 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1498 lck = (kmp_user_lock_p)crit;
1499 KMP_ASSERT(lck != NULL);
1500 if (__kmp_env_consistency_check) {
1501 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001502 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001503#if USE_ITT_BUILD
1504 __kmp_itt_critical_releasing(lck);
1505#endif
1506#if KMP_USE_INLINED_TAS
1507 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1508 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1509 } else
1510#elif KMP_USE_INLINED_FUTEX
1511 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1512 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1513 } else
1514#endif
1515 {
1516 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1517 }
1518 } else {
1519 kmp_indirect_lock_t *ilk =
1520 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1521 KMP_ASSERT(ilk != NULL);
1522 lck = ilk->lock;
1523 if (__kmp_env_consistency_check) {
1524 __kmp_pop_sync(global_tid, ct_critical, loc);
1525 }
1526#if USE_ITT_BUILD
1527 __kmp_itt_critical_releasing(lck);
1528#endif
1529 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1530 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001531
1532#else // KMP_USE_DYNAMIC_LOCK
1533
Jonathan Peyton30419822017-05-12 18:01:32 +00001534 if ((__kmp_user_lock_kind == lk_tas) &&
1535 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1536 lck = (kmp_user_lock_p)crit;
1537 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001538#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 else if ((__kmp_user_lock_kind == lk_futex) &&
1540 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1541 lck = (kmp_user_lock_p)crit;
1542 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001544 else { // ticket, queuing or drdpa
1545 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1546 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Jonathan Peyton30419822017-05-12 18:01:32 +00001548 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001549
Jonathan Peyton30419822017-05-12 18:01:32 +00001550 if (__kmp_env_consistency_check)
1551 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552
1553#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001554 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001555#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001556 // Value of 'crit' should be good for using as a critical_id of the critical
1557 // section directive.
1558 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559
Joachim Protze82e94a52017-11-01 10:08:30 +00001560#endif // KMP_USE_DYNAMIC_LOCK
1561
1562#if OMPT_SUPPORT && OMPT_OPTIONAL
1563 /* OMPT release event triggers after lock is released; place here to trigger
1564 * for all #if branches */
1565 OMPT_STORE_RETURN_ADDRESS(global_tid);
1566 if (ompt_enabled.ompt_callback_mutex_released) {
1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00001568 ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001569 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001570#endif
1571
Jonathan Peyton30419822017-05-12 18:01:32 +00001572 KMP_POP_PARTITIONED_TIMER();
1573 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001574}
1575
1576/*!
1577@ingroup SYNCHRONIZATION
1578@param loc source location information
1579@param global_tid thread id.
1580@return one if the thread should execute the master block, zero otherwise
1581
Jonathan Peyton30419822017-05-12 18:01:32 +00001582Start execution of a combined barrier and master. The barrier is executed inside
1583this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001585kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1586 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589
Jonathan Peyton30419822017-05-12 18:01:32 +00001590 if (!TCR_4(__kmp_init_parallel))
1591 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001592
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 if (__kmp_env_consistency_check)
1594 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595
Joachim Protze82e94a52017-11-01 10:08:30 +00001596#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001597 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001598 if (ompt_enabled.enabled) {
1599 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001600 if (ompt_frame->enter_frame == NULL)
1601 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001602 OMPT_STORE_RETURN_ADDRESS(global_tid);
1603 }
1604#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001605#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001606 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001607#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001608 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001609#if OMPT_SUPPORT && OMPT_OPTIONAL
1610 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001611 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001612 }
1613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616}
1617
1618/*!
1619@ingroup SYNCHRONIZATION
1620@param loc source location information
1621@param global_tid thread id.
1622
1623Complete the execution of a combined barrier and master. This function should
1624only be called at the completion of the <tt>master</tt> code. Other threads will
1625still be waiting at the barrier and this call releases them.
1626*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001627void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1628 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631}
1632
1633/*!
1634@ingroup SYNCHRONIZATION
1635@param loc source location information
1636@param global_tid thread id.
1637@return one if the thread should execute the master block, zero otherwise
1638
1639Start execution of a combined barrier and master(nowait) construct.
1640The barrier is executed inside this function.
1641There is no equivalent "end" function, since the
1642*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001643kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1644 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Jonathan Peyton30419822017-05-12 18:01:32 +00001646 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647
Jonathan Peyton30419822017-05-12 18:01:32 +00001648 if (!TCR_4(__kmp_init_parallel))
1649 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650
Jonathan Peyton30419822017-05-12 18:01:32 +00001651 if (__kmp_env_consistency_check) {
1652 if (loc == 0) {
1653 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001655 __kmp_check_barrier(global_tid, ct_barrier, loc);
1656 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001657
Joachim Protze82e94a52017-11-01 10:08:30 +00001658#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001659 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001660 if (ompt_enabled.enabled) {
1661 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001662 if (ompt_frame->enter_frame == NULL)
1663 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001664 OMPT_STORE_RETURN_ADDRESS(global_tid);
1665 }
1666#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001667#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001668 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001669#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001670 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001671#if OMPT_SUPPORT && OMPT_OPTIONAL
1672 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001673 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001674 }
1675#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001676
Jonathan Peyton30419822017-05-12 18:01:32 +00001677 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678
Jonathan Peyton30419822017-05-12 18:01:32 +00001679 if (__kmp_env_consistency_check) {
1680 /* there's no __kmpc_end_master called; so the (stats) */
1681 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682
Jonathan Peyton30419822017-05-12 18:01:32 +00001683 if (global_tid < 0) {
1684 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001686 if (ret) {
1687 /* only one thread should do the pop since only */
1688 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 __kmp_pop_sync(global_tid, ct_master, loc);
1691 }
1692 }
1693
1694 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695}
1696
1697/* The BARRIER for a SINGLE process section is always explicit */
1698/*!
1699@ingroup WORK_SHARING
1700@param loc source location information
1701@param global_tid global thread number
1702@return One if this thread should execute the single construct, zero otherwise.
1703
1704Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001705There are no implicit barriers in the two "single" calls, rather the compiler
1706should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707*/
1708
Jonathan Peyton30419822017-05-12 18:01:32 +00001709kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1710 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 if (rc) {
1713 // We are going to execute the single statement, so we should count it.
1714 KMP_COUNT_BLOCK(OMP_SINGLE);
1715 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1716 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001717
Joachim Protze82e94a52017-11-01 10:08:30 +00001718#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 kmp_info_t *this_thr = __kmp_threads[global_tid];
1720 kmp_team_t *team = this_thr->th.th_team;
1721 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001722
Joachim Protze82e94a52017-11-01 10:08:30 +00001723 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001724 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001725 if (ompt_enabled.ompt_callback_work) {
1726 ompt_callbacks.ompt_callback(ompt_callback_work)(
1727 ompt_work_single_executor, ompt_scope_begin,
1728 &(team->t.ompt_team_info.parallel_data),
1729 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001731 }
1732 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001733 if (ompt_enabled.ompt_callback_work) {
1734 ompt_callbacks.ompt_callback(ompt_callback_work)(
1735 ompt_work_single_other, ompt_scope_begin,
1736 &(team->t.ompt_team_info.parallel_data),
1737 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738 1, OMPT_GET_RETURN_ADDRESS(0));
1739 ompt_callbacks.ompt_callback(ompt_callback_work)(
1740 ompt_work_single_other, ompt_scope_end,
1741 &(team->t.ompt_team_info.parallel_data),
1742 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1743 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001744 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001746 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001747#endif
1748
Jonathan Peyton30419822017-05-12 18:01:32 +00001749 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001750}
1751
1752/*!
1753@ingroup WORK_SHARING
1754@param loc source location information
1755@param global_tid global thread number
1756
1757Mark the end of a <tt>single</tt> construct. This function should
1758only be called by the thread that executed the block of code protected
1759by the `single` construct.
1760*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001761void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1762 __kmp_exit_single(global_tid);
1763 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001764
Joachim Protze82e94a52017-11-01 10:08:30 +00001765#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001766 kmp_info_t *this_thr = __kmp_threads[global_tid];
1767 kmp_team_t *team = this_thr->th.th_team;
1768 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001769
Joachim Protze82e94a52017-11-01 10:08:30 +00001770 if (ompt_enabled.ompt_callback_work) {
1771 ompt_callbacks.ompt_callback(ompt_callback_work)(
1772 ompt_work_single_executor, ompt_scope_end,
1773 &(team->t.ompt_team_info.parallel_data),
1774 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1775 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001776 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778}
1779
1780/*!
1781@ingroup WORK_SHARING
1782@param loc Source location
1783@param global_tid Global thread id
1784
1785Mark the end of a statically scheduled loop.
1786*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001787void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001788 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001789 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790
Joachim Protze82e94a52017-11-01 10:08:30 +00001791#if OMPT_SUPPORT && OMPT_OPTIONAL
1792 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001793 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001794 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001795 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1796 // Determine workshare type
1797 if (loc != NULL) {
1798 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1799 ompt_work_type = ompt_work_loop;
1800 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1801 ompt_work_type = ompt_work_sections;
1802 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1803 ompt_work_type = ompt_work_distribute;
1804 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001805 // use default set above.
1806 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001807 }
1808 KMP_DEBUG_ASSERT(ompt_work_type);
1809 }
1810 ompt_callbacks.ompt_callback(ompt_callback_work)(
1811 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1812 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001814#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001815 if (__kmp_env_consistency_check)
1816 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817}
1818
Jonathan Peyton30419822017-05-12 18:01:32 +00001819// User routines which take C-style arguments (call by value)
1820// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jonathan Peyton30419822017-05-12 18:01:32 +00001822void ompc_set_num_threads(int arg) {
1823 // !!!!! TODO: check the per-task binding
1824 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001825}
1826
Jonathan Peyton30419822017-05-12 18:01:32 +00001827void ompc_set_dynamic(int flag) {
1828 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829
Jonathan Peyton30419822017-05-12 18:01:32 +00001830 /* For the thread-private implementation of the internal controls */
1831 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832
Jonathan Peyton30419822017-05-12 18:01:32 +00001833 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834
Jonathan Peyton30419822017-05-12 18:01:32 +00001835 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836}
1837
Jonathan Peyton30419822017-05-12 18:01:32 +00001838void ompc_set_nested(int flag) {
1839 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840
Jonathan Peyton30419822017-05-12 18:01:32 +00001841 /* For the thread-private internal controls implementation */
1842 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001845
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847}
1848
Jonathan Peyton30419822017-05-12 18:01:32 +00001849void ompc_set_max_active_levels(int max_active_levels) {
1850 /* TO DO */
1851 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
Jonathan Peyton30419822017-05-12 18:01:32 +00001853 /* For the per-thread internal controls implementation */
1854 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001855}
1856
Jonathan Peyton30419822017-05-12 18:01:32 +00001857void ompc_set_schedule(omp_sched_t kind, int modifier) {
1858 // !!!!! TODO: check the per-task binding
1859 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001860}
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862int ompc_get_ancestor_thread_num(int level) {
1863 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001864}
1865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866int ompc_get_team_size(int level) {
1867 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868}
1869
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001870#if OMP_50_ENABLED
1871/* OpenMP 5.0 Affinity Format API */
1872
1873void ompc_set_affinity_format(char const *format) {
1874 if (!__kmp_init_serial) {
1875 __kmp_serial_initialize();
1876 }
1877 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1878 format, KMP_STRLEN(format) + 1);
1879}
1880
1881size_t ompc_get_affinity_format(char *buffer, size_t size) {
1882 size_t format_size;
1883 if (!__kmp_init_serial) {
1884 __kmp_serial_initialize();
1885 }
1886 format_size = KMP_STRLEN(__kmp_affinity_format);
1887 if (buffer && size) {
1888 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1889 format_size + 1);
1890 }
1891 return format_size;
1892}
1893
1894void ompc_display_affinity(char const *format) {
1895 int gtid;
1896 if (!TCR_4(__kmp_init_middle)) {
1897 __kmp_middle_initialize();
1898 }
1899 gtid = __kmp_get_gtid();
1900 __kmp_aux_display_affinity(gtid, format);
1901}
1902
1903size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1904 char const *format) {
1905 int gtid;
1906 size_t num_required;
1907 kmp_str_buf_t capture_buf;
1908 if (!TCR_4(__kmp_init_middle)) {
1909 __kmp_middle_initialize();
1910 }
1911 gtid = __kmp_get_gtid();
1912 __kmp_str_buf_init(&capture_buf);
1913 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1914 if (buffer && buf_size) {
1915 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1916 capture_buf.used + 1);
1917 }
1918 __kmp_str_buf_free(&capture_buf);
1919 return num_required;
1920}
1921#endif /* OMP_50_ENABLED */
1922
Jonathan Peyton30419822017-05-12 18:01:32 +00001923void kmpc_set_stacksize(int arg) {
1924 // __kmp_aux_set_stacksize initializes the library if needed
1925 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926}
1927
Jonathan Peyton30419822017-05-12 18:01:32 +00001928void kmpc_set_stacksize_s(size_t arg) {
1929 // __kmp_aux_set_stacksize initializes the library if needed
1930 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931}
1932
Jonathan Peyton30419822017-05-12 18:01:32 +00001933void kmpc_set_blocktime(int arg) {
1934 int gtid, tid;
1935 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936
Jonathan Peyton30419822017-05-12 18:01:32 +00001937 gtid = __kmp_entry_gtid();
1938 tid = __kmp_tid_from_gtid(gtid);
1939 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940
Jonathan Peyton30419822017-05-12 18:01:32 +00001941 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942}
1943
Jonathan Peyton30419822017-05-12 18:01:32 +00001944void kmpc_set_library(int arg) {
1945 // __kmp_user_set_library initializes the library if needed
1946 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947}
1948
Jonathan Peyton30419822017-05-12 18:01:32 +00001949void kmpc_set_defaults(char const *str) {
1950 // __kmp_aux_set_defaults initializes the library if needed
1951 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952}
1953
Jonathan Peyton30419822017-05-12 18:01:32 +00001954void kmpc_set_disp_num_buffers(int arg) {
1955 // ignore after initialization because some teams have already
1956 // allocated dispatch buffers
1957 if (__kmp_init_serial == 0 && arg > 0)
1958 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001959}
1960
Jonathan Peyton30419822017-05-12 18:01:32 +00001961int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001962#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001963 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001965 if (!TCR_4(__kmp_init_middle)) {
1966 __kmp_middle_initialize();
1967 }
1968 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#endif
1970}
1971
Jonathan Peyton30419822017-05-12 18:01:32 +00001972int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001973#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001974 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001976 if (!TCR_4(__kmp_init_middle)) {
1977 __kmp_middle_initialize();
1978 }
1979 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980#endif
1981}
1982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001984#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 if (!TCR_4(__kmp_init_middle)) {
1988 __kmp_middle_initialize();
1989 }
1990 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991#endif
1992}
1993
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994/* -------------------------------------------------------------------------- */
1995/*!
1996@ingroup THREADPRIVATE
1997@param loc source location information
1998@param gtid global thread number
1999@param cpy_size size of the cpy_data buffer
2000@param cpy_data pointer to data to be copied
2001@param cpy_func helper function to call for copying data
2002@param didit flag variable: 1=single thread; 0=not single thread
2003
Jonathan Peyton30419822017-05-12 18:01:32 +00002004__kmpc_copyprivate implements the interface for the private data broadcast
2005needed for the copyprivate clause associated with a single region in an
2006OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002007All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002008One of the threads (called the single thread) should have the <tt>didit</tt>
2009variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010All threads pass a pointer to a data buffer (cpy_data) that they have built.
2011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012The OpenMP specification forbids the use of nowait on the single region when a
2013copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2014barrier internally to avoid race conditions, so the code generation for the
2015single region should avoid generating a barrier after the call to @ref
2016__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017
2018The <tt>gtid</tt> parameter is the global thread id for the current thread.
2019The <tt>loc</tt> parameter is a pointer to source location information.
2020
Jonathan Peyton30419822017-05-12 18:01:32 +00002021Internal implementation: The single thread will first copy its descriptor
2022address (cpy_data) to a team-private location, then the other threads will each
2023call the function pointed to by the parameter cpy_func, which carries out the
2024copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025
Jonathan Peyton30419822017-05-12 18:01:32 +00002026The cpy_func routine used for the copy and the contents of the data area defined
2027by cpy_data and cpy_size may be built in any fashion that will allow the copy
2028to be done. For instance, the cpy_data buffer can hold the actual data to be
2029copied or it may hold a list of pointers to the data. The cpy_func routine must
2030interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031
2032The interface to cpy_func is as follows:
2033@code
2034void cpy_func( void *destination, void *source )
2035@endcode
2036where void *destination is the cpy_data pointer for the thread being copied to
2037and void *source is the cpy_data pointer for the thread being copied from.
2038*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002039void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2040 void *cpy_data, void (*cpy_func)(void *, void *),
2041 kmp_int32 didit) {
2042 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043
Jonathan Peyton30419822017-05-12 18:01:32 +00002044 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045
Jonathan Peyton30419822017-05-12 18:01:32 +00002046 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047
Jonathan Peyton30419822017-05-12 18:01:32 +00002048 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049
Jonathan Peyton30419822017-05-12 18:01:32 +00002050 if (__kmp_env_consistency_check) {
2051 if (loc == 0) {
2052 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002054 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055
Jonathan Peyton30419822017-05-12 18:01:32 +00002056 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057
Jonathan Peyton30419822017-05-12 18:01:32 +00002058 if (didit)
2059 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060
Joachim Protze82e94a52017-11-01 10:08:30 +00002061#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00002062 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002063 if (ompt_enabled.enabled) {
2064 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00002065 if (ompt_frame->enter_frame == NULL)
2066 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00002067 OMPT_STORE_RETURN_ADDRESS(gtid);
2068 }
2069#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002070/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002071#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002072 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002073#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002074 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002075
Jonathan Peyton30419822017-05-12 18:01:32 +00002076 if (!didit)
2077 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
Jonathan Peyton30419822017-05-12 18:01:32 +00002079// Consider next barrier a user-visible barrier for barrier region boundaries
2080// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081
Joachim Protze82e94a52017-11-01 10:08:30 +00002082#if OMPT_SUPPORT
2083 if (ompt_enabled.enabled) {
2084 OMPT_STORE_RETURN_ADDRESS(gtid);
2085 }
2086#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002088 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2089// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002090#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002091 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002092#if OMPT_SUPPORT && OMPT_OPTIONAL
2093 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00002094 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002095 }
2096#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097}
2098
2099/* -------------------------------------------------------------------------- */
2100
Jonathan Peyton30419822017-05-12 18:01:32 +00002101#define INIT_LOCK __kmp_init_user_lock_with_checks
2102#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2103#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2104#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2105#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2106#define ACQUIRE_NESTED_LOCK_TIMED \
2107 __kmp_acquire_nested_user_lock_with_checks_timed
2108#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2109#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2110#define TEST_LOCK __kmp_test_user_lock_with_checks
2111#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2112#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2113#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114
Jonathan Peyton30419822017-05-12 18:01:32 +00002115// TODO: Make check abort messages use location info & pass it into
2116// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002118#if KMP_USE_DYNAMIC_LOCK
2119
2120// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002121static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2122 kmp_dyna_lockseq_t seq) {
2123 if (KMP_IS_D_LOCK(seq)) {
2124 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002125#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002126 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002127#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002128 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002129 KMP_INIT_I_LOCK(lock, seq);
2130#if USE_ITT_BUILD
2131 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2132 __kmp_itt_lock_creating(ilk->lock, loc);
2133#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002134 }
2135}
2136
2137// internal nest lock initializer
2138static __forceinline void
2139__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2140 kmp_dyna_lockseq_t seq) {
2141#if KMP_USE_TSX
2142 // Don't have nested lock implementation for speculative locks
2143 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2144 seq = __kmp_user_lock_seq;
2145#endif
2146 switch (seq) {
2147 case lockseq_tas:
2148 seq = lockseq_nested_tas;
2149 break;
2150#if KMP_USE_FUTEX
2151 case lockseq_futex:
2152 seq = lockseq_nested_futex;
2153 break;
2154#endif
2155 case lockseq_ticket:
2156 seq = lockseq_nested_ticket;
2157 break;
2158 case lockseq_queuing:
2159 seq = lockseq_nested_queuing;
2160 break;
2161 case lockseq_drdpa:
2162 seq = lockseq_nested_drdpa;
2163 break;
2164 default:
2165 seq = lockseq_nested_queuing;
2166 }
2167 KMP_INIT_I_LOCK(lock, seq);
2168#if USE_ITT_BUILD
2169 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2170 __kmp_itt_lock_creating(ilk->lock, loc);
2171#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002172}
2173
2174/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002175void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2176 uintptr_t hint) {
2177 KMP_DEBUG_ASSERT(__kmp_init_serial);
2178 if (__kmp_env_consistency_check && user_lock == NULL) {
2179 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2180 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002181
Jonathan Peyton30419822017-05-12 18:01:32 +00002182 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002183
2184#if OMPT_SUPPORT && OMPT_OPTIONAL
2185 // This is the case, if called from omp_init_lock_with_hint:
2186 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2187 if (!codeptr)
2188 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2189 if (ompt_enabled.ompt_callback_lock_init) {
2190 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2191 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002192 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002193 codeptr);
2194 }
2195#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002196}
2197
2198/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002199void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2200 void **user_lock, uintptr_t hint) {
2201 KMP_DEBUG_ASSERT(__kmp_init_serial);
2202 if (__kmp_env_consistency_check && user_lock == NULL) {
2203 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2204 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002205
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002207
2208#if OMPT_SUPPORT && OMPT_OPTIONAL
2209 // This is the case, if called from omp_init_lock_with_hint:
2210 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2211 if (!codeptr)
2212 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2213 if (ompt_enabled.ompt_callback_lock_init) {
2214 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2215 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002216 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002217 codeptr);
2218 }
2219#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002220}
2221
2222#endif // KMP_USE_DYNAMIC_LOCK
2223
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002225void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002226#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002227
2228 KMP_DEBUG_ASSERT(__kmp_init_serial);
2229 if (__kmp_env_consistency_check && user_lock == NULL) {
2230 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2231 }
2232 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002233
Joachim Protze82e94a52017-11-01 10:08:30 +00002234#if OMPT_SUPPORT && OMPT_OPTIONAL
2235 // This is the case, if called from omp_init_lock_with_hint:
2236 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2237 if (!codeptr)
2238 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2239 if (ompt_enabled.ompt_callback_lock_init) {
2240 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2241 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002242 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002243 codeptr);
2244 }
2245#endif
2246
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002247#else // KMP_USE_DYNAMIC_LOCK
2248
Jonathan Peyton30419822017-05-12 18:01:32 +00002249 static char const *const func = "omp_init_lock";
2250 kmp_user_lock_p lck;
2251 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002252
Jonathan Peyton30419822017-05-12 18:01:32 +00002253 if (__kmp_env_consistency_check) {
2254 if (user_lock == NULL) {
2255 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002256 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002257 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002258
Jonathan Peyton30419822017-05-12 18:01:32 +00002259 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260
Jonathan Peyton30419822017-05-12 18:01:32 +00002261 if ((__kmp_user_lock_kind == lk_tas) &&
2262 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2263 lck = (kmp_user_lock_p)user_lock;
2264 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002265#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 else if ((__kmp_user_lock_kind == lk_futex) &&
2267 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2268 lck = (kmp_user_lock_p)user_lock;
2269 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002270#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002271 else {
2272 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2273 }
2274 INIT_LOCK(lck);
2275 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276
Joachim Protze82e94a52017-11-01 10:08:30 +00002277#if OMPT_SUPPORT && OMPT_OPTIONAL
2278 // This is the case, if called from omp_init_lock_with_hint:
2279 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2280 if (!codeptr)
2281 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2282 if (ompt_enabled.ompt_callback_lock_init) {
2283 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2284 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002285 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002286 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002287#endif
2288
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002290 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002291#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002292
2293#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294} // __kmpc_init_lock
2295
2296/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002297void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002298#if KMP_USE_DYNAMIC_LOCK
2299
Jonathan Peyton30419822017-05-12 18:01:32 +00002300 KMP_DEBUG_ASSERT(__kmp_init_serial);
2301 if (__kmp_env_consistency_check && user_lock == NULL) {
2302 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2303 }
2304 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002305
Joachim Protze82e94a52017-11-01 10:08:30 +00002306#if OMPT_SUPPORT && OMPT_OPTIONAL
2307 // This is the case, if called from omp_init_lock_with_hint:
2308 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2309 if (!codeptr)
2310 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2311 if (ompt_enabled.ompt_callback_lock_init) {
2312 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2313 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002314 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002315 codeptr);
2316 }
2317#endif
2318
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002319#else // KMP_USE_DYNAMIC_LOCK
2320
Jonathan Peyton30419822017-05-12 18:01:32 +00002321 static char const *const func = "omp_init_nest_lock";
2322 kmp_user_lock_p lck;
2323 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324
Jonathan Peyton30419822017-05-12 18:01:32 +00002325 if (__kmp_env_consistency_check) {
2326 if (user_lock == NULL) {
2327 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002329 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002330
Jonathan Peyton30419822017-05-12 18:01:32 +00002331 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332
Jonathan Peyton30419822017-05-12 18:01:32 +00002333 if ((__kmp_user_lock_kind == lk_tas) &&
2334 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2335 OMP_NEST_LOCK_T_SIZE)) {
2336 lck = (kmp_user_lock_p)user_lock;
2337 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002338#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002339 else if ((__kmp_user_lock_kind == lk_futex) &&
2340 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2341 OMP_NEST_LOCK_T_SIZE)) {
2342 lck = (kmp_user_lock_p)user_lock;
2343 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002344#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002345 else {
2346 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2347 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348
Jonathan Peyton30419822017-05-12 18:01:32 +00002349 INIT_NESTED_LOCK(lck);
2350 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002351
Joachim Protze82e94a52017-11-01 10:08:30 +00002352#if OMPT_SUPPORT && OMPT_OPTIONAL
2353 // This is the case, if called from omp_init_lock_with_hint:
2354 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2355 if (!codeptr)
2356 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2357 if (ompt_enabled.ompt_callback_lock_init) {
2358 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2359 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002360 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002361 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002362#endif
2363
Jim Cownie5e8470a2013-09-27 10:38:44 +00002364#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002365 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002367
2368#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369} // __kmpc_init_nest_lock
2370
Jonathan Peyton30419822017-05-12 18:01:32 +00002371void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002372#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002373
Jonathan Peyton30419822017-05-12 18:01:32 +00002374#if USE_ITT_BUILD
2375 kmp_user_lock_p lck;
2376 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2377 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2378 } else {
2379 lck = (kmp_user_lock_p)user_lock;
2380 }
2381 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002383#if OMPT_SUPPORT && OMPT_OPTIONAL
2384 // This is the case, if called from omp_init_lock_with_hint:
2385 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2386 if (!codeptr)
2387 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2388 if (ompt_enabled.ompt_callback_lock_destroy) {
2389 kmp_user_lock_p lck;
2390 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2391 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2392 } else {
2393 lck = (kmp_user_lock_p)user_lock;
2394 }
2395 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002396 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002397 }
2398#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002399 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2400#else
2401 kmp_user_lock_p lck;
2402
2403 if ((__kmp_user_lock_kind == lk_tas) &&
2404 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2405 lck = (kmp_user_lock_p)user_lock;
2406 }
2407#if KMP_USE_FUTEX
2408 else if ((__kmp_user_lock_kind == lk_futex) &&
2409 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2410 lck = (kmp_user_lock_p)user_lock;
2411 }
2412#endif
2413 else {
2414 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2415 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416
Joachim Protze82e94a52017-11-01 10:08:30 +00002417#if OMPT_SUPPORT && OMPT_OPTIONAL
2418 // This is the case, if called from omp_init_lock_with_hint:
2419 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2420 if (!codeptr)
2421 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2422 if (ompt_enabled.ompt_callback_lock_destroy) {
2423 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002424 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002425 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002426#endif
2427
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002429 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002431 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432
Jonathan Peyton30419822017-05-12 18:01:32 +00002433 if ((__kmp_user_lock_kind == lk_tas) &&
2434 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2435 ;
2436 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002437#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002438 else if ((__kmp_user_lock_kind == lk_futex) &&
2439 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2440 ;
2441 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002443 else {
2444 __kmp_user_lock_free(user_lock, gtid, lck);
2445 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002446#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447} // __kmpc_destroy_lock
2448
2449/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002450void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002451#if KMP_USE_DYNAMIC_LOCK
2452
Jonathan Peyton30419822017-05-12 18:01:32 +00002453#if USE_ITT_BUILD
2454 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2455 __kmp_itt_lock_destroyed(ilk->lock);
2456#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002457#if OMPT_SUPPORT && OMPT_OPTIONAL
2458 // This is the case, if called from omp_init_lock_with_hint:
2459 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2460 if (!codeptr)
2461 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2462 if (ompt_enabled.ompt_callback_lock_destroy) {
2463 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002464 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002465 }
2466#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002467 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002468
2469#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470
Jonathan Peyton30419822017-05-12 18:01:32 +00002471 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002472
Jonathan Peyton30419822017-05-12 18:01:32 +00002473 if ((__kmp_user_lock_kind == lk_tas) &&
2474 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2475 OMP_NEST_LOCK_T_SIZE)) {
2476 lck = (kmp_user_lock_p)user_lock;
2477 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002478#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002479 else if ((__kmp_user_lock_kind == lk_futex) &&
2480 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2481 OMP_NEST_LOCK_T_SIZE)) {
2482 lck = (kmp_user_lock_p)user_lock;
2483 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002484#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002485 else {
2486 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2487 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488
Joachim Protze82e94a52017-11-01 10:08:30 +00002489#if OMPT_SUPPORT && OMPT_OPTIONAL
2490 // This is the case, if called from omp_init_lock_with_hint:
2491 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2492 if (!codeptr)
2493 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2494 if (ompt_enabled.ompt_callback_lock_destroy) {
2495 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002496 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002498#endif
2499
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002501 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502#endif /* USE_ITT_BUILD */
2503
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jonathan Peyton30419822017-05-12 18:01:32 +00002506 if ((__kmp_user_lock_kind == lk_tas) &&
2507 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2508 OMP_NEST_LOCK_T_SIZE)) {
2509 ;
2510 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002511#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002512 else if ((__kmp_user_lock_kind == lk_futex) &&
2513 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2514 OMP_NEST_LOCK_T_SIZE)) {
2515 ;
2516 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002518 else {
2519 __kmp_user_lock_free(user_lock, gtid, lck);
2520 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002521#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522} // __kmpc_destroy_nest_lock
2523
Jonathan Peyton30419822017-05-12 18:01:32 +00002524void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2525 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002526#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002527 int tag = KMP_EXTRACT_D_TAG(user_lock);
2528#if USE_ITT_BUILD
2529 __kmp_itt_lock_acquiring(
2530 (kmp_user_lock_p)
2531 user_lock); // itt function will get to the right lock object.
2532#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002533#if OMPT_SUPPORT && OMPT_OPTIONAL
2534 // This is the case, if called from omp_init_lock_with_hint:
2535 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2536 if (!codeptr)
2537 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2538 if (ompt_enabled.ompt_callback_mutex_acquire) {
2539 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2540 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002541 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002542 codeptr);
2543 }
2544#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002545#if KMP_USE_INLINED_TAS
2546 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2547 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2548 } else
2549#elif KMP_USE_INLINED_FUTEX
2550 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2551 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2552 } else
2553#endif
2554 {
2555 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2556 }
2557#if USE_ITT_BUILD
2558 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2559#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002560#if OMPT_SUPPORT && OMPT_OPTIONAL
2561 if (ompt_enabled.ompt_callback_mutex_acquired) {
2562 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002563 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002564 }
2565#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002566
2567#else // KMP_USE_DYNAMIC_LOCK
2568
Jonathan Peyton30419822017-05-12 18:01:32 +00002569 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002570
Jonathan Peyton30419822017-05-12 18:01:32 +00002571 if ((__kmp_user_lock_kind == lk_tas) &&
2572 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2573 lck = (kmp_user_lock_p)user_lock;
2574 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002575#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002576 else if ((__kmp_user_lock_kind == lk_futex) &&
2577 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2578 lck = (kmp_user_lock_p)user_lock;
2579 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002580#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002581 else {
2582 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2583 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002584
2585#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002586 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002588#if OMPT_SUPPORT && OMPT_OPTIONAL
2589 // This is the case, if called from omp_init_lock_with_hint:
2590 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2591 if (!codeptr)
2592 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2593 if (ompt_enabled.ompt_callback_mutex_acquire) {
2594 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2595 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002596 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002597 }
2598#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601
2602#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002603 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002605
Joachim Protze82e94a52017-11-01 10:08:30 +00002606#if OMPT_SUPPORT && OMPT_OPTIONAL
2607 if (ompt_enabled.ompt_callback_mutex_acquired) {
2608 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002609 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002611#endif
2612
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002613#endif // KMP_USE_DYNAMIC_LOCK
2614}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615
Jonathan Peyton30419822017-05-12 18:01:32 +00002616void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002617#if KMP_USE_DYNAMIC_LOCK
2618
Jonathan Peyton30419822017-05-12 18:01:32 +00002619#if USE_ITT_BUILD
2620 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2621#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002622#if OMPT_SUPPORT && OMPT_OPTIONAL
2623 // This is the case, if called from omp_init_lock_with_hint:
2624 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2625 if (!codeptr)
2626 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2627 if (ompt_enabled.enabled) {
2628 if (ompt_enabled.ompt_callback_mutex_acquire) {
2629 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2630 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002631 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002632 codeptr);
2633 }
2634 }
2635#endif
2636 int acquire_status =
2637 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002638 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002639#if USE_ITT_BUILD
2640 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002641#endif
2642
Joachim Protze82e94a52017-11-01 10:08:30 +00002643#if OMPT_SUPPORT && OMPT_OPTIONAL
2644 if (ompt_enabled.enabled) {
2645 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2646 if (ompt_enabled.ompt_callback_mutex_acquired) {
2647 // lock_first
2648 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002649 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002650 }
2651 } else {
2652 if (ompt_enabled.ompt_callback_nest_lock) {
2653 // lock_next
2654 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002655 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002656 }
2657 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002659#endif
2660
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002661#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002662 int acquire_status;
2663 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002664
Jonathan Peyton30419822017-05-12 18:01:32 +00002665 if ((__kmp_user_lock_kind == lk_tas) &&
2666 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2667 OMP_NEST_LOCK_T_SIZE)) {
2668 lck = (kmp_user_lock_p)user_lock;
2669 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002670#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002671 else if ((__kmp_user_lock_kind == lk_futex) &&
2672 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2673 OMP_NEST_LOCK_T_SIZE)) {
2674 lck = (kmp_user_lock_p)user_lock;
2675 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002677 else {
2678 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2679 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680
2681#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002682 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002684#if OMPT_SUPPORT && OMPT_OPTIONAL
2685 // This is the case, if called from omp_init_lock_with_hint:
2686 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687 if (!codeptr)
2688 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689 if (ompt_enabled.enabled) {
2690 if (ompt_enabled.ompt_callback_mutex_acquire) {
2691 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2692 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002693 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002694 }
2695 }
2696#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002697
Jonathan Peyton30419822017-05-12 18:01:32 +00002698 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699
2700#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002701 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002702#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002703
Joachim Protze82e94a52017-11-01 10:08:30 +00002704#if OMPT_SUPPORT && OMPT_OPTIONAL
2705 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002707 if (ompt_enabled.ompt_callback_mutex_acquired) {
2708 // lock_first
2709 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002710 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002711 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002712 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002713 if (ompt_enabled.ompt_callback_nest_lock) {
2714 // lock_next
2715 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002716 ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002717 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002718 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002720#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002721
2722#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723}
2724
Jonathan Peyton30419822017-05-12 18:01:32 +00002725void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002726#if KMP_USE_DYNAMIC_LOCK
2727
Jonathan Peyton30419822017-05-12 18:01:32 +00002728 int tag = KMP_EXTRACT_D_TAG(user_lock);
2729#if USE_ITT_BUILD
2730 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2731#endif
2732#if KMP_USE_INLINED_TAS
2733 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2734 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2735 } else
2736#elif KMP_USE_INLINED_FUTEX
2737 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2738 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2739 } else
2740#endif
2741 {
2742 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2743 }
2744
Joachim Protze82e94a52017-11-01 10:08:30 +00002745#if OMPT_SUPPORT && OMPT_OPTIONAL
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 if (ompt_enabled.ompt_callback_mutex_released) {
2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002752 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002753 }
2754#endif
2755
Jonathan Peyton30419822017-05-12 18:01:32 +00002756#else // KMP_USE_DYNAMIC_LOCK
2757
2758 kmp_user_lock_p lck;
2759
2760 /* Can't use serial interval since not block structured */
2761 /* release the lock */
2762
2763 if ((__kmp_user_lock_kind == lk_tas) &&
2764 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2765#if KMP_OS_LINUX && \
2766 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2767// "fast" path implemented to fix customer performance issue
2768#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002769 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002770#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002771 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2772 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002773
2774#if OMPT_SUPPORT && OMPT_OPTIONAL
2775 // This is the case, if called from omp_init_lock_with_hint:
2776 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2777 if (!codeptr)
2778 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2779 if (ompt_enabled.ompt_callback_mutex_released) {
2780 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002781 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002782 }
2783#endif
2784
Jonathan Peyton30419822017-05-12 18:01:32 +00002785 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002786#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002787 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002788#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002789 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002790#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 else if ((__kmp_user_lock_kind == lk_futex) &&
2792 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2793 lck = (kmp_user_lock_p)user_lock;
2794 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 else {
2797 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2798 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799
2800#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002801 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802#endif /* USE_ITT_BUILD */
2803
Jonathan Peyton30419822017-05-12 18:01:32 +00002804 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002805
Joachim Protze82e94a52017-11-01 10:08:30 +00002806#if OMPT_SUPPORT && OMPT_OPTIONAL
2807 // This is the case, if called from omp_init_lock_with_hint:
2808 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2809 if (!codeptr)
2810 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2811 if (ompt_enabled.ompt_callback_mutex_released) {
2812 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002813 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002815#endif
2816
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002817#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818}
2819
2820/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002821void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002822#if KMP_USE_DYNAMIC_LOCK
2823
Jonathan Peyton30419822017-05-12 18:01:32 +00002824#if USE_ITT_BUILD
2825 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2826#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002827 int release_status =
2828 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002829 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002830
2831#if OMPT_SUPPORT && OMPT_OPTIONAL
2832 // This is the case, if called from omp_init_lock_with_hint:
2833 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2834 if (!codeptr)
2835 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2836 if (ompt_enabled.enabled) {
2837 if (release_status == KMP_LOCK_RELEASED) {
2838 if (ompt_enabled.ompt_callback_mutex_released) {
2839 // release_lock_last
2840 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002841 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002842 }
2843 } else if (ompt_enabled.ompt_callback_nest_lock) {
2844 // release_lock_prev
2845 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002846 ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002847 }
2848 }
2849#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002850
2851#else // KMP_USE_DYNAMIC_LOCK
2852
2853 kmp_user_lock_p lck;
2854
2855 /* Can't use serial interval since not block structured */
2856
2857 if ((__kmp_user_lock_kind == lk_tas) &&
2858 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2859 OMP_NEST_LOCK_T_SIZE)) {
2860#if KMP_OS_LINUX && \
2861 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2862 // "fast" path implemented to fix customer performance issue
2863 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2864#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002865 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002866#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002867
2868#if OMPT_SUPPORT && OMPT_OPTIONAL
2869 int release_status = KMP_LOCK_STILL_HELD;
2870#endif
2871
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 if (--(tl->lk.depth_locked) == 0) {
2873 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002874#if OMPT_SUPPORT && OMPT_OPTIONAL
2875 release_status = KMP_LOCK_RELEASED;
2876#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002877 }
2878 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002879
2880#if OMPT_SUPPORT && OMPT_OPTIONAL
2881 // This is the case, if called from omp_init_lock_with_hint:
2882 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2883 if (!codeptr)
2884 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2885 if (ompt_enabled.enabled) {
2886 if (release_status == KMP_LOCK_RELEASED) {
2887 if (ompt_enabled.ompt_callback_mutex_released) {
2888 // release_lock_last
2889 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002890 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002891 }
2892 } else if (ompt_enabled.ompt_callback_nest_lock) {
2893 // release_lock_previous
2894 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002895 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002896 }
2897 }
2898#endif
2899
Jonathan Peyton30419822017-05-12 18:01:32 +00002900 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002901#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002902 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002904 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002905#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002906 else if ((__kmp_user_lock_kind == lk_futex) &&
2907 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2908 OMP_NEST_LOCK_T_SIZE)) {
2909 lck = (kmp_user_lock_p)user_lock;
2910 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002912 else {
2913 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2914 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915
2916#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002917 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002918#endif /* USE_ITT_BUILD */
2919
Jonathan Peyton30419822017-05-12 18:01:32 +00002920 int release_status;
2921 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002922#if OMPT_SUPPORT && OMPT_OPTIONAL
2923 // This is the case, if called from omp_init_lock_with_hint:
2924 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2925 if (!codeptr)
2926 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2927 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002928 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002929 if (ompt_enabled.ompt_callback_mutex_released) {
2930 // release_lock_last
2931 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002932 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002933 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002934 } else if (ompt_enabled.ompt_callback_nest_lock) {
2935 // release_lock_previous
2936 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002937 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002938 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002939 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002940#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002941
2942#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002943}
2944
2945/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002946int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2947 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002948
2949#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002950 int rc;
2951 int tag = KMP_EXTRACT_D_TAG(user_lock);
2952#if USE_ITT_BUILD
2953 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2954#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002955#if OMPT_SUPPORT && OMPT_OPTIONAL
2956 // This is the case, if called from omp_init_lock_with_hint:
2957 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2958 if (!codeptr)
2959 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2960 if (ompt_enabled.ompt_callback_mutex_acquire) {
2961 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2962 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002963 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002964 codeptr);
2965 }
2966#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002967#if KMP_USE_INLINED_TAS
2968 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2969 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2970 } else
2971#elif KMP_USE_INLINED_FUTEX
2972 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2973 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2974 } else
2975#endif
2976 {
2977 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2978 }
2979 if (rc) {
2980#if USE_ITT_BUILD
2981 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2982#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002983#if OMPT_SUPPORT && OMPT_OPTIONAL
2984 if (ompt_enabled.ompt_callback_mutex_acquired) {
2985 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002986 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002987 }
2988#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002989 return FTN_TRUE;
2990 } else {
2991#if USE_ITT_BUILD
2992 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2993#endif
2994 return FTN_FALSE;
2995 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002996
2997#else // KMP_USE_DYNAMIC_LOCK
2998
Jonathan Peyton30419822017-05-12 18:01:32 +00002999 kmp_user_lock_p lck;
3000 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003001
Jonathan Peyton30419822017-05-12 18:01:32 +00003002 if ((__kmp_user_lock_kind == lk_tas) &&
3003 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3004 lck = (kmp_user_lock_p)user_lock;
3005 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003006#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003007 else if ((__kmp_user_lock_kind == lk_futex) &&
3008 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3009 lck = (kmp_user_lock_p)user_lock;
3010 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003012 else {
3013 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3014 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003015
3016#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003017 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003018#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003019#if OMPT_SUPPORT && OMPT_OPTIONAL
3020 // This is the case, if called from omp_init_lock_with_hint:
3021 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3022 if (!codeptr)
3023 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3024 if (ompt_enabled.ompt_callback_mutex_acquire) {
3025 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3026 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00003027 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003028 }
3029#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030
Jonathan Peyton30419822017-05-12 18:01:32 +00003031 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003033 if (rc) {
3034 __kmp_itt_lock_acquired(lck);
3035 } else {
3036 __kmp_itt_lock_cancelled(lck);
3037 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003039#if OMPT_SUPPORT && OMPT_OPTIONAL
3040 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3041 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003042 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003043 }
3044#endif
3045
Jonathan Peyton30419822017-05-12 18:01:32 +00003046 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047
Jonathan Peyton30419822017-05-12 18:01:32 +00003048/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003049
3050#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003051}
3052
3053/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003054int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003055#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003056 int rc;
3057#if USE_ITT_BUILD
3058 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3059#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003060#if OMPT_SUPPORT && OMPT_OPTIONAL
3061 // This is the case, if called from omp_init_lock_with_hint:
3062 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3063 if (!codeptr)
3064 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3065 if (ompt_enabled.ompt_callback_mutex_acquire) {
3066 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3067 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003068 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003069 codeptr);
3070 }
3071#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3073#if USE_ITT_BUILD
3074 if (rc) {
3075 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3076 } else {
3077 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3078 }
3079#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003080#if OMPT_SUPPORT && OMPT_OPTIONAL
3081 if (ompt_enabled.enabled && rc) {
3082 if (rc == 1) {
3083 if (ompt_enabled.ompt_callback_mutex_acquired) {
3084 // lock_first
3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003086 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003087 }
3088 } else {
3089 if (ompt_enabled.ompt_callback_nest_lock) {
3090 // lock_next
3091 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003092 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003093 }
3094 }
3095 }
3096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003098
3099#else // KMP_USE_DYNAMIC_LOCK
3100
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 kmp_user_lock_p lck;
3102 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104 if ((__kmp_user_lock_kind == lk_tas) &&
3105 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3106 OMP_NEST_LOCK_T_SIZE)) {
3107 lck = (kmp_user_lock_p)user_lock;
3108 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003109#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003110 else if ((__kmp_user_lock_kind == lk_futex) &&
3111 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3112 OMP_NEST_LOCK_T_SIZE)) {
3113 lck = (kmp_user_lock_p)user_lock;
3114 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003115#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003116 else {
3117 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3118 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003119
3120#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003121 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003122#endif /* USE_ITT_BUILD */
3123
Joachim Protze82e94a52017-11-01 10:08:30 +00003124#if OMPT_SUPPORT && OMPT_OPTIONAL
3125 // This is the case, if called from omp_init_lock_with_hint:
3126 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3127 if (!codeptr)
3128 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3129 if (ompt_enabled.enabled) &&
3130 ompt_enabled.ompt_callback_mutex_acquire) {
3131 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3132 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003133 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003134 }
3135#endif
3136
Jonathan Peyton30419822017-05-12 18:01:32 +00003137 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 if (rc) {
3140 __kmp_itt_lock_acquired(lck);
3141 } else {
3142 __kmp_itt_lock_cancelled(lck);
3143 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003145#if OMPT_SUPPORT && OMPT_OPTIONAL
3146 if (ompt_enabled.enabled && rc) {
3147 if (rc == 1) {
3148 if (ompt_enabled.ompt_callback_mutex_acquired) {
3149 // lock_first
3150 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003151 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003152 }
3153 } else {
3154 if (ompt_enabled.ompt_callback_nest_lock) {
3155 // lock_next
3156 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003157 ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003158 }
3159 }
3160 }
3161#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003163
Jonathan Peyton30419822017-05-12 18:01:32 +00003164/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003165
3166#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167}
3168
Jonathan Peyton30419822017-05-12 18:01:32 +00003169// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003170
Jonathan Peyton30419822017-05-12 18:01:32 +00003171// keep the selected method in a thread local structure for cross-function
3172// usage: will be used in __kmpc_end_reduce* functions;
3173// another solution: to re-determine the method one more time in
3174// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003175// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003176#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3177 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003178
Jonathan Peyton30419822017-05-12 18:01:32 +00003179#define __KMP_GET_REDUCTION_METHOD(gtid) \
3180 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003181
Jonathan Peyton30419822017-05-12 18:01:32 +00003182// description of the packed_reduction_method variable: look at the macros in
3183// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003184
3185// used in a critical section reduce block
3186static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003187__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3188 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003189
Jonathan Peyton30419822017-05-12 18:01:32 +00003190 // this lock was visible to a customer and to the threading profile tool as a
3191 // serial overhead span (although it's used for an internal purpose only)
3192 // why was it visible in previous implementation?
3193 // should we keep it visible in new reduce block?
3194 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003195
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003196#if KMP_USE_DYNAMIC_LOCK
3197
Jonathan Peyton30419822017-05-12 18:01:32 +00003198 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3199 // Check if it is initialized.
3200 if (*lk == 0) {
3201 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3202 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3203 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003204 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003205 __kmp_init_indirect_csptr(crit, loc, global_tid,
3206 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003207 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003208 }
3209 // Branch for accessing the actual lock object and set operation. This
3210 // branching is inevitable since this lock initialization does not follow the
3211 // normal dispatch path (lock table is not used).
3212 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3213 lck = (kmp_user_lock_p)lk;
3214 KMP_DEBUG_ASSERT(lck != NULL);
3215 if (__kmp_env_consistency_check) {
3216 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3217 }
3218 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3219 } else {
3220 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3221 lck = ilk->lock;
3222 KMP_DEBUG_ASSERT(lck != NULL);
3223 if (__kmp_env_consistency_check) {
3224 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3225 }
3226 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3227 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003228
3229#else // KMP_USE_DYNAMIC_LOCK
3230
Jonathan Peyton30419822017-05-12 18:01:32 +00003231 // We know that the fast reduction code is only emitted by Intel compilers
3232 // with 32 byte critical sections. If there isn't enough space, then we
3233 // have to use a pointer.
3234 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3235 lck = (kmp_user_lock_p)crit;
3236 } else {
3237 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3238 }
3239 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003240
Jonathan Peyton30419822017-05-12 18:01:32 +00003241 if (__kmp_env_consistency_check)
3242 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003243
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003245
3246#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003247}
3248
3249// used in a critical section reduce block
3250static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003251__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3252 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003255
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003256#if KMP_USE_DYNAMIC_LOCK
3257
Jonathan Peyton30419822017-05-12 18:01:32 +00003258 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3259 lck = (kmp_user_lock_p)crit;
3260 if (__kmp_env_consistency_check)
3261 __kmp_pop_sync(global_tid, ct_critical, loc);
3262 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3263 } else {
3264 kmp_indirect_lock_t *ilk =
3265 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3266 if (__kmp_env_consistency_check)
3267 __kmp_pop_sync(global_tid, ct_critical, loc);
3268 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3269 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003270
3271#else // KMP_USE_DYNAMIC_LOCK
3272
Jonathan Peyton30419822017-05-12 18:01:32 +00003273 // We know that the fast reduction code is only emitted by Intel compilers
3274 // with 32 byte critical sections. If there isn't enough space, then we have
3275 // to use a pointer.
3276 if (__kmp_base_user_lock_size > 32) {
3277 lck = *((kmp_user_lock_p *)crit);
3278 KMP_ASSERT(lck != NULL);
3279 } else {
3280 lck = (kmp_user_lock_p)crit;
3281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 if (__kmp_env_consistency_check)
3284 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003285
Jonathan Peyton30419822017-05-12 18:01:32 +00003286 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003287
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003288#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003289} // __kmp_end_critical_section_reduce_block
3290
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003291#if OMP_40_ENABLED
3292static __forceinline int
3293__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3294 int *task_state) {
3295 kmp_team_t *team;
3296
3297 // Check if we are inside the teams construct?
3298 if (th->th.th_teams_microtask) {
3299 *team_p = team = th->th.th_team;
3300 if (team->t.t_level == th->th.th_teams_level) {
3301 // This is reduction at teams construct.
3302 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3303 // Let's swap teams temporarily for the reduction.
3304 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3305 th->th.th_team = team->t.t_parent;
3306 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3307 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3308 *task_state = th->th.th_task_state;
3309 th->th.th_task_state = 0;
3310
3311 return 1;
3312 }
3313 }
3314 return 0;
3315}
3316
3317static __forceinline void
3318__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3319 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3320 th->th.th_info.ds.ds_tid = 0;
3321 th->th.th_team = team;
3322 th->th.th_team_nproc = team->t.t_nproc;
3323 th->th.th_task_team = team->t.t_task_team[task_state];
3324 th->th.th_task_state = task_state;
3325}
3326#endif
3327
Jim Cownie5e8470a2013-09-27 10:38:44 +00003328/* 2.a.i. Reduce Block without a terminating barrier */
3329/*!
3330@ingroup SYNCHRONIZATION
3331@param loc source location information
3332@param global_tid global thread number
3333@param num_vars number of items (variables) to be reduced
3334@param reduce_size size of data in bytes to be reduced
3335@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003336@param reduce_func callback function providing reduction operation on two
3337operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003338@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003339@result 1 for the master thread, 0 for all other team threads, 2 for all team
3340threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003341
3342The nowait version is used for a reduce clause with the nowait argument.
3343*/
3344kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003345__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3346 size_t reduce_size, void *reduce_data,
3347 void (*reduce_func)(void *lhs_data, void *rhs_data),
3348 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003349
Jonathan Peyton30419822017-05-12 18:01:32 +00003350 KMP_COUNT_BLOCK(REDUCE_nowait);
3351 int retval = 0;
3352 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003353#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003354 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003355 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003356 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003357#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003358 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003359
Jonathan Peyton30419822017-05-12 18:01:32 +00003360 // why do we need this initialization here at all?
3361 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003362
Jonathan Peyton30419822017-05-12 18:01:32 +00003363 // do not call __kmp_serial_initialize(), it will be called by
3364 // __kmp_parallel_initialize() if needed
3365 // possible detection of false-positive race by the threadchecker ???
3366 if (!TCR_4(__kmp_init_parallel))
3367 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003368
Jonathan Peyton30419822017-05-12 18:01:32 +00003369// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003370#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003371 if (__kmp_env_consistency_check)
3372 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003373#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003374 if (__kmp_env_consistency_check)
3375 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003376#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003377
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003378#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003379 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003380 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003381#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003382
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3384 // the value should be kept in a variable
3385 // the variable should be either a construct-specific or thread-specific
3386 // property, not a team specific property
3387 // (a thread can reach the next reduce block on the next construct, reduce
3388 // method may differ on the next construct)
3389 // an ident_t "loc" parameter could be used as a construct-specific property
3390 // (what if loc == 0?)
3391 // (if both construct-specific and team-specific variables were shared,
3392 // then unness extra syncs should be needed)
3393 // a thread-specific variable is better regarding two issues above (next
3394 // construct and extra syncs)
3395 // a thread-specific "th_local.reduction_method" variable is used currently
3396 // each thread executes 'determine' and 'set' lines (no need to execute by one
3397 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 packed_reduction_method = __kmp_determine_reduction_method(
3400 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3401 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003402
Jonathan Peyton30419822017-05-12 18:01:32 +00003403 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404
Jonathan Peyton30419822017-05-12 18:01:32 +00003405 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3406 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003407
Jonathan Peyton30419822017-05-12 18:01:32 +00003408 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409
Jonathan Peyton30419822017-05-12 18:01:32 +00003410 // usage: if team size == 1, no synchronization is required ( Intel
3411 // platforms only )
3412 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003413
Jonathan Peyton30419822017-05-12 18:01:32 +00003414 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003415
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3419 // won't be called by the code gen)
3420 // (it's not quite good, because the checking block has been closed by
3421 // this 'pop',
3422 // but atomic operation has not been executed yet, will be executed
3423 // slightly later, literally on next instruction)
3424 if (__kmp_env_consistency_check)
3425 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426
Jonathan Peyton30419822017-05-12 18:01:32 +00003427 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3428 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430// AT: performance issue: a real barrier here
3431// AT: (if master goes slow, other threads are blocked here waiting for the
3432// master to come and release them)
3433// AT: (it's not what a customer might expect specifying NOWAIT clause)
3434// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3435// be confusing to a customer)
3436// AT: another implementation of *barrier_gather*nowait() (or some other design)
3437// might go faster and be more in line with sense of NOWAIT
3438// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003439
Jonathan Peyton30419822017-05-12 18:01:32 +00003440// this barrier should be invisible to a customer and to the threading profile
3441// tool (it's neither a terminating barrier nor customer's code, it's
3442// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003443#if OMPT_SUPPORT
3444 // JP: can this barrier potentially leed to task scheduling?
3445 // JP: as long as there is a barrier in the implementation, OMPT should and
3446 // will provide the barrier events
3447 // so we set-up the necessary frame/return addresses.
Joachim Protzec5836064b2018-05-28 08:14:58 +00003448 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003449 if (ompt_enabled.enabled) {
3450 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003451 if (ompt_frame->enter_frame == NULL)
3452 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003453 OMPT_STORE_RETURN_ADDRESS(global_tid);
3454 }
3455#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003456#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003457 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003458#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003459 retval =
3460 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3461 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3462 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003463#if OMPT_SUPPORT && OMPT_OPTIONAL
3464 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003465 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003466 }
3467#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468
Jonathan Peyton30419822017-05-12 18:01:32 +00003469 // all other workers except master should do this pop here
3470 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3471 if (__kmp_env_consistency_check) {
3472 if (retval == 0) {
3473 __kmp_pop_sync(global_tid, ct_reduce, loc);
3474 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003475 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003476
3477 } else {
3478
3479 // should never reach this block
3480 KMP_ASSERT(0); // "unexpected method"
3481 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003482#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003483 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003484 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003486#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003487 KA_TRACE(
3488 10,
3489 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3490 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003491
Jonathan Peyton30419822017-05-12 18:01:32 +00003492 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493}
3494
3495/*!
3496@ingroup SYNCHRONIZATION
3497@param loc source location information
3498@param global_tid global thread id.
3499@param lck pointer to the unique lock data structure
3500
3501Finish the execution of a reduce nowait.
3502*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003503void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3504 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003505
Jonathan Peyton30419822017-05-12 18:01:32 +00003506 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003507
Jonathan Peyton30419822017-05-12 18:01:32 +00003508 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003509
Jonathan Peyton30419822017-05-12 18:01:32 +00003510 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003511
Jonathan Peyton30419822017-05-12 18:01:32 +00003512 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003513
Jonathan Peyton30419822017-05-12 18:01:32 +00003514 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003515
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003517
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 // usage: if team size == 1, no synchronization is required ( on Intel
3519 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003520
Jonathan Peyton30419822017-05-12 18:01:32 +00003521 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003522
Jonathan Peyton30419822017-05-12 18:01:32 +00003523 // neither master nor other workers should get here
3524 // (code gen does not generate this call in case 2: atomic reduce block)
3525 // actually it's better to remove this elseif at all;
3526 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003527
Jonathan Peyton30419822017-05-12 18:01:32 +00003528 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3529 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003530
Jonathan Peyton30419822017-05-12 18:01:32 +00003531 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003532
Jonathan Peyton30419822017-05-12 18:01:32 +00003533 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003534
Jonathan Peyton30419822017-05-12 18:01:32 +00003535 // should never reach this block
3536 KMP_ASSERT(0); // "unexpected method"
3537 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538
Jonathan Peyton30419822017-05-12 18:01:32 +00003539 if (__kmp_env_consistency_check)
3540 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3543 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544
Jonathan Peyton30419822017-05-12 18:01:32 +00003545 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546}
3547
3548/* 2.a.ii. Reduce Block with a terminating barrier */
3549
3550/*!
3551@ingroup SYNCHRONIZATION
3552@param loc source location information
3553@param global_tid global thread number
3554@param num_vars number of items (variables) to be reduced
3555@param reduce_size size of data in bytes to be reduced
3556@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003557@param reduce_func callback function providing reduction operation on two
3558operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003560@result 1 for the master thread, 0 for all other team threads, 2 for all team
3561threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
3563A blocking reduce that includes an implicit barrier.
3564*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003565kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3566 size_t reduce_size, void *reduce_data,
3567 void (*reduce_func)(void *lhs_data, void *rhs_data),
3568 kmp_critical_name *lck) {
3569 KMP_COUNT_BLOCK(REDUCE_wait);
3570 int retval = 0;
3571 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003572#if OMP_40_ENABLED
3573 kmp_info_t *th;
3574 kmp_team_t *team;
3575 int teams_swapped = 0, task_state;
3576#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003577
Jonathan Peyton30419822017-05-12 18:01:32 +00003578 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579
Jonathan Peyton30419822017-05-12 18:01:32 +00003580 // why do we need this initialization here at all?
3581 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003582
Jonathan Peyton30419822017-05-12 18:01:32 +00003583 // do not call __kmp_serial_initialize(), it will be called by
3584 // __kmp_parallel_initialize() if needed
3585 // possible detection of false-positive race by the threadchecker ???
3586 if (!TCR_4(__kmp_init_parallel))
3587 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003588
Jonathan Peyton30419822017-05-12 18:01:32 +00003589// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003590#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003591 if (__kmp_env_consistency_check)
3592 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003593#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003594 if (__kmp_env_consistency_check)
3595 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003596#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003597
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003598#if OMP_40_ENABLED
3599 th = __kmp_thread_from_gtid(global_tid);
3600 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3601#endif // OMP_40_ENABLED
3602
Jonathan Peyton30419822017-05-12 18:01:32 +00003603 packed_reduction_method = __kmp_determine_reduction_method(
3604 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3605 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003606
Jonathan Peyton30419822017-05-12 18:01:32 +00003607 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003608
Jonathan Peyton30419822017-05-12 18:01:32 +00003609 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3610 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003611
Jonathan Peyton30419822017-05-12 18:01:32 +00003612 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003613
Jonathan Peyton30419822017-05-12 18:01:32 +00003614 // usage: if team size == 1, no synchronization is required ( Intel
3615 // platforms only )
3616 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003617
Jonathan Peyton30419822017-05-12 18:01:32 +00003618 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003619
Jonathan Peyton30419822017-05-12 18:01:32 +00003620 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003621
Jonathan Peyton30419822017-05-12 18:01:32 +00003622 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3623 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625// case tree_reduce_block:
3626// this barrier should be visible to a customer and to the threading profile
3627// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003628#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003629 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003630 if (ompt_enabled.enabled) {
3631 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003632 if (ompt_frame->enter_frame == NULL)
3633 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003634 OMPT_STORE_RETURN_ADDRESS(global_tid);
3635 }
3636#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003637#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 __kmp_threads[global_tid]->th.th_ident =
3639 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003640#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003641 retval =
3642 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3643 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3644 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003645#if OMPT_SUPPORT && OMPT_OPTIONAL
3646 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003647 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003648 }
3649#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003650
Jonathan Peyton30419822017-05-12 18:01:32 +00003651 // all other workers except master should do this pop here
3652 // ( none of other workers except master will enter __kmpc_end_reduce() )
3653 if (__kmp_env_consistency_check) {
3654 if (retval == 0) { // 0: all other workers; 1: master
3655 __kmp_pop_sync(global_tid, ct_reduce, loc);
3656 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003657 }
3658
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003660
Jonathan Peyton30419822017-05-12 18:01:32 +00003661 // should never reach this block
3662 KMP_ASSERT(0); // "unexpected method"
3663 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003664#if OMP_40_ENABLED
3665 if (teams_swapped) {
3666 __kmp_restore_swapped_teams(th, team, task_state);
3667 }
3668#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003669
3670 KA_TRACE(10,
3671 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3672 global_tid, packed_reduction_method, retval));
3673
3674 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675}
3676
3677/*!
3678@ingroup SYNCHRONIZATION
3679@param loc source location information
3680@param global_tid global thread id.
3681@param lck pointer to the unique lock data structure
3682
3683Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003684The <tt>lck</tt> pointer must be the same as that used in the corresponding
3685start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003686*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003687void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3688 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689
Jonathan Peyton30419822017-05-12 18:01:32 +00003690 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003691#if OMP_40_ENABLED
3692 kmp_info_t *th;
3693 kmp_team_t *team;
3694 int teams_swapped = 0, task_state;
3695#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003696
Jonathan Peyton30419822017-05-12 18:01:32 +00003697 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003698
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003699#if OMP_40_ENABLED
3700 th = __kmp_thread_from_gtid(global_tid);
3701 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3702#endif // OMP_40_ENABLED
3703
Jonathan Peyton30419822017-05-12 18:01:32 +00003704 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003705
Jonathan Peyton30419822017-05-12 18:01:32 +00003706 // this barrier should be visible to a customer and to the threading profile
3707 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003708
Jonathan Peyton30419822017-05-12 18:01:32 +00003709 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003710
Jonathan Peyton30419822017-05-12 18:01:32 +00003711 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003712
Jonathan Peyton30419822017-05-12 18:01:32 +00003713// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003714#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003715 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003716 if (ompt_enabled.enabled) {
3717 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003718 if (ompt_frame->enter_frame == NULL)
3719 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003720 OMPT_STORE_RETURN_ADDRESS(global_tid);
3721 }
3722#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003723#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003724 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003725#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003726 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003727#if OMPT_SUPPORT && OMPT_OPTIONAL
3728 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003729 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003730 }
3731#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732
Jonathan Peyton30419822017-05-12 18:01:32 +00003733 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003734
Jonathan Peyton30419822017-05-12 18:01:32 +00003735// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
Jonathan Peyton30419822017-05-12 18:01:32 +00003737// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003738#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003739 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003740 if (ompt_enabled.enabled) {
3741 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003742 if (ompt_frame->enter_frame == NULL)
3743 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003744 OMPT_STORE_RETURN_ADDRESS(global_tid);
3745 }
3746#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003747#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003748 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003749#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003750 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003751#if OMPT_SUPPORT && OMPT_OPTIONAL
3752 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003753 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003754 }
3755#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003756
Jonathan Peyton30419822017-05-12 18:01:32 +00003757 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003758
Joachim Protze82e94a52017-11-01 10:08:30 +00003759#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003760 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003761 if (ompt_enabled.enabled) {
3762 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003763 if (ompt_frame->enter_frame == NULL)
3764 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003765 OMPT_STORE_RETURN_ADDRESS(global_tid);
3766 }
3767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003768// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003769#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003771#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003772 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003773#if OMPT_SUPPORT && OMPT_OPTIONAL
3774 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003775 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003776 }
3777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003778
Jonathan Peyton30419822017-05-12 18:01:32 +00003779 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3780 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003781
Jonathan Peyton30419822017-05-12 18:01:32 +00003782 // only master executes here (master releases all other workers)
3783 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3784 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003785
Jonathan Peyton30419822017-05-12 18:01:32 +00003786 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003787
Jonathan Peyton30419822017-05-12 18:01:32 +00003788 // should never reach this block
3789 KMP_ASSERT(0); // "unexpected method"
3790 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003791#if OMP_40_ENABLED
3792 if (teams_swapped) {
3793 __kmp_restore_swapped_teams(th, team, task_state);
3794 }
3795#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003796
Jonathan Peyton30419822017-05-12 18:01:32 +00003797 if (__kmp_env_consistency_check)
3798 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3801 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802
Jonathan Peyton30419822017-05-12 18:01:32 +00003803 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003804}
3805
3806#undef __KMP_GET_REDUCTION_METHOD
3807#undef __KMP_SET_REDUCTION_METHOD
3808
Jonathan Peyton30419822017-05-12 18:01:32 +00003809/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003810
Jonathan Peyton30419822017-05-12 18:01:32 +00003811kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003812
Jonathan Peyton30419822017-05-12 18:01:32 +00003813 kmp_int32 gtid;
3814 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003815
Jonathan Peyton30419822017-05-12 18:01:32 +00003816 gtid = __kmp_get_gtid();
3817 if (gtid < 0) {
3818 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003819 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003820 thread = __kmp_thread_from_gtid(gtid);
3821 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003822
3823} // __kmpc_get_taskid
3824
Jonathan Peyton30419822017-05-12 18:01:32 +00003825kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003826
Jonathan Peyton30419822017-05-12 18:01:32 +00003827 kmp_int32 gtid;
3828 kmp_info_t *thread;
3829 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003830
Jonathan Peyton30419822017-05-12 18:01:32 +00003831 gtid = __kmp_get_gtid();
3832 if (gtid < 0) {
3833 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003834 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003835 thread = __kmp_thread_from_gtid(gtid);
3836 parent_task = thread->th.th_current_task->td_parent;
3837 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003838
3839} // __kmpc_get_parent_taskid
3840
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003841#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003842/*!
3843@ingroup WORK_SHARING
3844@param loc source location information.
3845@param gtid global thread number.
3846@param num_dims number of associated doacross loops.
3847@param dims info on loops bounds.
3848
3849Initialize doacross loop information.
3850Expect compiler send us inclusive bounds,
3851e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3852*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003853void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003854 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003855 int j, idx;
3856 kmp_int64 last, trace_count;
3857 kmp_info_t *th = __kmp_threads[gtid];
3858 kmp_team_t *team = th->th.th_team;
3859 kmp_uint32 *flags;
3860 kmp_disp_t *pr_buf = th->th.th_dispatch;
3861 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003862
Jonathan Peyton30419822017-05-12 18:01:32 +00003863 KA_TRACE(
3864 20,
3865 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3866 gtid, num_dims, !team->t.t_serialized));
3867 KMP_DEBUG_ASSERT(dims != NULL);
3868 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003869
Jonathan Peyton30419822017-05-12 18:01:32 +00003870 if (team->t.t_serialized) {
3871 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3872 return; // no dependencies if team is serialized
3873 }
3874 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3875 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3876 // the next loop
3877 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003878
Jonathan Peyton30419822017-05-12 18:01:32 +00003879 // Save bounds info into allocated private buffer
3880 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3881 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3882 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3883 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3884 pr_buf->th_doacross_info[0] =
3885 (kmp_int64)num_dims; // first element is number of dimensions
3886 // Save also address of num_done in order to access it later without knowing
3887 // the buffer index
3888 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3889 pr_buf->th_doacross_info[2] = dims[0].lo;
3890 pr_buf->th_doacross_info[3] = dims[0].up;
3891 pr_buf->th_doacross_info[4] = dims[0].st;
3892 last = 5;
3893 for (j = 1; j < num_dims; ++j) {
3894 kmp_int64
3895 range_length; // To keep ranges of all dimensions but the first dims[0]
3896 if (dims[j].st == 1) { // most common case
3897 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3898 range_length = dims[j].up - dims[j].lo + 1;
3899 } else {
3900 if (dims[j].st > 0) {
3901 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3902 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3903 } else { // negative increment
3904 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3905 range_length =
3906 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3907 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003908 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003909 pr_buf->th_doacross_info[last++] = range_length;
3910 pr_buf->th_doacross_info[last++] = dims[j].lo;
3911 pr_buf->th_doacross_info[last++] = dims[j].up;
3912 pr_buf->th_doacross_info[last++] = dims[j].st;
3913 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003914
Jonathan Peyton30419822017-05-12 18:01:32 +00003915 // Compute total trip count.
3916 // Start with range of dims[0] which we don't need to keep in the buffer.
3917 if (dims[0].st == 1) { // most common case
3918 trace_count = dims[0].up - dims[0].lo + 1;
3919 } else if (dims[0].st > 0) {
3920 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3921 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3922 } else { // negative increment
3923 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3924 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3925 }
3926 for (j = 1; j < num_dims; ++j) {
3927 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3928 }
3929 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003930
Jonathan Peyton30419822017-05-12 18:01:32 +00003931 // Check if shared buffer is not occupied by other loop (idx -
3932 // __kmp_dispatch_num_buffers)
3933 if (idx != sh_buf->doacross_buf_idx) {
3934 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003935 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3936 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003938#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003939 // Check if we are the first thread. After the CAS the first thread gets 0,
3940 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003941 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3942 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3943 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3944#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003945 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003946 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3947#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003948 if (flags == NULL) {
3949 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003950 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003951 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3952 KMP_MB();
3953 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003954 } else if (flags == (kmp_uint32 *)1) {
3955#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003956 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003957 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3958#else
3959 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3960#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003961 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003962 KMP_MB();
3963 } else {
3964 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003965 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003966 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003967 pr_buf->th_doacross_flags =
3968 sh_buf->doacross_flags; // save private copy in order to not
3969 // touch shared buffer on each iteration
3970 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003971}
3972
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003973void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003974 kmp_int32 shft, num_dims, i;
3975 kmp_uint32 flag;
3976 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3977 kmp_info_t *th = __kmp_threads[gtid];
3978 kmp_team_t *team = th->th.th_team;
3979 kmp_disp_t *pr_buf;
3980 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003981
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3983 if (team->t.t_serialized) {
3984 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3985 return; // no dependencies if team is serialized
3986 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003987
Jonathan Peyton30419822017-05-12 18:01:32 +00003988 // calculate sequential iteration number and check out-of-bounds condition
3989 pr_buf = th->th.th_dispatch;
3990 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3991 num_dims = pr_buf->th_doacross_info[0];
3992 lo = pr_buf->th_doacross_info[2];
3993 up = pr_buf->th_doacross_info[3];
3994 st = pr_buf->th_doacross_info[4];
3995 if (st == 1) { // most common case
3996 if (vec[0] < lo || vec[0] > up) {
3997 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3998 "bounds [%lld,%lld]\n",
3999 gtid, vec[0], lo, up));
4000 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004001 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004002 iter_number = vec[0] - lo;
4003 } else if (st > 0) {
4004 if (vec[0] < lo || vec[0] > up) {
4005 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4006 "bounds [%lld,%lld]\n",
4007 gtid, vec[0], lo, up));
4008 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004009 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4011 } else { // negative increment
4012 if (vec[0] > lo || vec[0] < up) {
4013 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4014 "bounds [%lld,%lld]\n",
4015 gtid, vec[0], lo, up));
4016 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004017 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004018 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4019 }
4020 for (i = 1; i < num_dims; ++i) {
4021 kmp_int64 iter, ln;
4022 kmp_int32 j = i * 4;
4023 ln = pr_buf->th_doacross_info[j + 1];
4024 lo = pr_buf->th_doacross_info[j + 2];
4025 up = pr_buf->th_doacross_info[j + 3];
4026 st = pr_buf->th_doacross_info[j + 4];
4027 if (st == 1) {
4028 if (vec[i] < lo || vec[i] > up) {
4029 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030 "bounds [%lld,%lld]\n",
4031 gtid, vec[i], lo, up));
4032 return;
4033 }
4034 iter = vec[i] - lo;
4035 } else if (st > 0) {
4036 if (vec[i] < lo || vec[i] > up) {
4037 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038 "bounds [%lld,%lld]\n",
4039 gtid, vec[i], lo, up));
4040 return;
4041 }
4042 iter = (kmp_uint64)(vec[i] - lo) / st;
4043 } else { // st < 0
4044 if (vec[i] > lo || vec[i] < up) {
4045 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4046 "bounds [%lld,%lld]\n",
4047 gtid, vec[i], lo, up));
4048 return;
4049 }
4050 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4051 }
4052 iter_number = iter + ln * iter_number;
4053 }
4054 shft = iter_number % 32; // use 32-bit granularity
4055 iter_number >>= 5; // divided by 32
4056 flag = 1 << shft;
4057 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4058 KMP_YIELD(TRUE);
4059 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004060 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004061 KA_TRACE(20,
4062 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4063 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004064}
4065
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004066void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004067 kmp_int32 shft, num_dims, i;
4068 kmp_uint32 flag;
4069 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4070 kmp_info_t *th = __kmp_threads[gtid];
4071 kmp_team_t *team = th->th.th_team;
4072 kmp_disp_t *pr_buf;
4073 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004074
Jonathan Peyton30419822017-05-12 18:01:32 +00004075 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4076 if (team->t.t_serialized) {
4077 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4078 return; // no dependencies if team is serialized
4079 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004080
Jonathan Peyton30419822017-05-12 18:01:32 +00004081 // calculate sequential iteration number (same as in "wait" but no
4082 // out-of-bounds checks)
4083 pr_buf = th->th.th_dispatch;
4084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4085 num_dims = pr_buf->th_doacross_info[0];
4086 lo = pr_buf->th_doacross_info[2];
4087 st = pr_buf->th_doacross_info[4];
4088 if (st == 1) { // most common case
4089 iter_number = vec[0] - lo;
4090 } else if (st > 0) {
4091 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4092 } else { // negative increment
4093 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4094 }
4095 for (i = 1; i < num_dims; ++i) {
4096 kmp_int64 iter, ln;
4097 kmp_int32 j = i * 4;
4098 ln = pr_buf->th_doacross_info[j + 1];
4099 lo = pr_buf->th_doacross_info[j + 2];
4100 st = pr_buf->th_doacross_info[j + 4];
4101 if (st == 1) {
4102 iter = vec[i] - lo;
4103 } else if (st > 0) {
4104 iter = (kmp_uint64)(vec[i] - lo) / st;
4105 } else { // st < 0
4106 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004107 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004108 iter_number = iter + ln * iter_number;
4109 }
4110 shft = iter_number % 32; // use 32-bit granularity
4111 iter_number >>= 5; // divided by 32
4112 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004113 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004114 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004115 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004116 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4117 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004118}
4119
Jonathan Peyton30419822017-05-12 18:01:32 +00004120void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004121 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004122 kmp_info_t *th = __kmp_threads[gtid];
4123 kmp_team_t *team = th->th.th_team;
4124 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004125
Jonathan Peyton30419822017-05-12 18:01:32 +00004126 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4127 if (team->t.t_serialized) {
4128 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4129 return; // nothing to do
4130 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004131 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004132 if (num_done == th->th.th_team_nproc) {
4133 // we are the last thread, need to free shared resources
4134 int idx = pr_buf->th_doacross_buf_idx - 1;
4135 dispatch_shared_info_t *sh_buf =
4136 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4137 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4138 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004139 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004140 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004141 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004142 sh_buf->doacross_flags = NULL;
4143 sh_buf->doacross_num_done = 0;
4144 sh_buf->doacross_buf_idx +=
4145 __kmp_dispatch_num_buffers; // free buffer for future re-use
4146 }
4147 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004148 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004149 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4150 pr_buf->th_doacross_info = NULL;
4151 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004152}
4153#endif
4154
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004155#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004156int __kmpc_get_target_offload(void) {
4157 if (!__kmp_init_serial) {
4158 __kmp_serial_initialize();
4159 }
4160 return __kmp_target_offload;
4161}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004162#endif // OMP_50_ENABLED
4163
Jim Cownie5e8470a2013-09-27 10:38:44 +00004164// end of file //