blob: 4cc8757e431ca403da31b2128c1d29c30f482765 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000073#if KMP_OS_WINDOWS && OMPT_SUPPORT
74 // Normal exit process on Windows does not allow worker threads of the final
75 // parallel region to finish reporting their events, so shutting down the
76 // library here fixes the issue at least for the cases where __kmpc_end() is
77 // placed properly.
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
80#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
83/*!
84@ingroup THREAD_STATES
85@param loc Source location information.
86@return The global thread index of the active thread.
87
88This function can be called in any context.
89
90If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000091single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
92that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000093active parallel construct. (Or zero if there is no active parallel
94construct, since the master thread is necessarily thread zero).
95
96If multiple non-OpenMP threads all enter an OpenMP construct then this
97will be a unique thread identifier among all the threads created by
98the OpenMP runtime (but the value cannote be defined in terms of
99OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000101kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
102 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107}
108
109/*!
110@ingroup THREAD_STATES
111@param loc Source location information.
112@return The number of threads under control of the OpenMP<sup>*</sup> runtime
113
114This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000115It returns the total number of threads under the control of the OpenMP runtime.
116That is not a number that can be determined by any OpenMP standard calls, since
117the library may be called from more than one non-OpenMP thread, and this
118reflects the total over all such calls. Similarly the runtime maintains
119underlying threads even when they are not active (since the cost of creating
120and destroying OS threads is high), this call counts all such threads even if
121they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000123kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
124 KC_TRACE(10,
125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128}
129
130/*!
131@ingroup THREAD_STATES
132@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000133@return The thread number of the calling thread in the innermost active parallel
134construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000136kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000146kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148
Jonathan Peyton30419822017-05-12 18:01:32 +0000149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150}
151
152/*!
153 * @ingroup DEPRECATED
154 * @param loc location description
155 *
156 * This function need not be called. It always returns TRUE.
157 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000158kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159#ifndef KMP_DEBUG
160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
163#else
164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 const char *semi2;
166 const char *semi3;
167 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168
Jonathan Peyton30419822017-05-12 18:01:32 +0000169 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000171 }
172 semi2 = loc->psource;
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 semi2 = strchr(semi2, ';');
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2 + 1, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187 name--;
188 }
189 if ((*name == '/') || (*name == ';')) {
190 name++;
191 }
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
194 }
195 }
196 semi3 = strchr(semi2 + 1, ';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
201 }
202 }
203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
206 }
207 return __kmp_par_range < 0;
208 }
209 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210
211#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000217@return 1 if this thread is executing inside an active parallel region, zero if
218not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000220kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222}
223
224/*!
225@ingroup PARALLEL
226@param loc source location information
227@param global_tid global thread number
228@param num_threads number of threads requested for this parallel construct
229
230Set the number of threads to be used by the next fork spawned by this thread.
231This call is only required if the parallel construct has a `num_threads` clause.
232*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234 kmp_int32 num_threads) {
235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239}
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245}
246
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#if OMP_40_ENABLED
248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255}
256
257#endif /* OMP_40_ENABLED */
258
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259/*!
260@ingroup PARALLEL
261@param loc source location information
262@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000263@param microtask pointer to callback routine consisting of outlined parallel
264construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265@param ... pointers to shared variables that aren't global
266
267Do the actual fork and call the microtask in the relevant number of threads.
268*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000269void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000271
Jonathan Peyton61118492016-05-20 19:03:38 +0000272#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000273 // If we were in a serial region, then stop the serial timer, record
274 // the event, and start parallel region timer
275 stats_state_e previous_state = KMP_GET_THREAD_STATE();
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278 } else {
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000281 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 if (inParallel) {
283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284 } else {
285 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000286 }
287#endif
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 // maybe to save thr_state is enough here
290 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 va_list ap;
292 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000295 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000296 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300 if (lwt)
301 ompt_frame = &(lwt->ompt_task_info.frame);
302 else {
303 int tid = __kmp_tid_from_gtid(gtid);
304 ompt_frame = &(
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000308 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000309 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000310#endif
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312#if INCLUDE_SSC_MARKS
313 SSC_MARK_FORKING();
314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000319#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325#if INCLUDE_SSC_MARKS
326 SSC_MARK_JOINING();
327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000329#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000330 ,
331 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000337
338#if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341 } else {
342 KMP_POP_PARTITIONED_TIMER();
343 }
344#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
347#if OMP_40_ENABLED
348/*!
349@ingroup PARALLEL
350@param loc source location information
351@param global_tid global thread number
352@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000353@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354
355Set the number of teams to be used by the teams construct.
356This call is only required if the teams construct has a `num_teams` clause
357or a `thread_limit` clause (or both).
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360 kmp_int32 num_teams, kmp_int32 num_threads) {
361 KA_TRACE(20,
362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
Jonathan Peyton30419822017-05-12 18:01:32 +0000365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366}
367
368/*!
369@ingroup PARALLEL
370@param loc source location information
371@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000372@param microtask pointer to callback routine consisting of outlined teams
373construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374@param ... pointers to shared variables that aren't global
375
376Do the actual fork and call the microtask in the relevant number of threads.
377*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000378void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379 ...) {
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
382 va_list ap;
383 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000386
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 // remember teams entry point and nesting level
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000391
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000392#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000395 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000398 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000399 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400#endif
401
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 // check if __kmpc_push_num_teams called, set default number of teams
403 // otherwise
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
406 }
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 VOLATILE_CAST(microtask_t)
413 __kmp_teams_master, // "wrapped" task
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000415#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000416 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 );
421 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000422#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000423 ,
424 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000425#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000427
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000428 // Pop current CG root off list
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431 this_thr->th.th_cg_roots = tmp->up;
432 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
433 " to node %p. cg_nthreads was %d\n",
434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435 __kmp_free(tmp);
436 // Restore current task's thread_limit from CG root
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 this_thr->th.th_current_task->td_icvs.thread_limit =
439 this_thr->th.th_cg_roots->cg_thread_limit;
440
Jonathan Peyton30419822017-05-12 18:01:32 +0000441 this_thr->th.th_teams_microtask = NULL;
442 this_thr->th.th_teams_level = 0;
443 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
444 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445}
446#endif /* OMP_40_ENABLED */
447
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448// I don't think this function should ever have been exported.
449// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
450// openmp code ever called it, but it's been exported from the RTL for so
451// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000452int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453
454/*!
455@ingroup PARALLEL
456@param loc source location information
457@param global_tid global thread number
458
459Enter a serialized parallel construct. This interface is used to handle a
460conditional parallel region, like this,
461@code
462#pragma omp parallel if (condition)
463@endcode
464when the condition is false.
465*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000466void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000467// The implementation is now in kmp_runtime.cpp so that it can share static
468// functions with kmp_fork_call since the tasks to be done are similar in
469// each case.
470#if OMPT_SUPPORT
471 OMPT_STORE_RETURN_ADDRESS(global_tid);
472#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000473 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474}
475
476/*!
477@ingroup PARALLEL
478@param loc source location information
479@param global_tid global thread number
480
481Leave a serialized parallel construct.
482*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000483void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
484 kmp_internal_control_t *top;
485 kmp_info_t *this_thr;
486 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 KC_TRACE(10,
489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 /* skip all this code for autopar serialized loops since it results in
492 unacceptable overhead */
493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
494 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 // Not autopar code
497 if (!TCR_4(__kmp_init_parallel))
498 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000500#if OMP_50_ENABLED
501 __kmp_resume_if_soft_paused();
502#endif
503
Jonathan Peyton30419822017-05-12 18:01:32 +0000504 this_thr = __kmp_threads[global_tid];
505 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000506
Jonathan Peyton30419822017-05-12 18:01:32 +0000507#if OMP_45_ENABLED
508 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000509
Jonathan Peyton30419822017-05-12 18:01:32 +0000510 // we need to wait for the proxy tasks before finishing the thread
511 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
512 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
513#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000514
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 KMP_MB();
516 KMP_DEBUG_ASSERT(serial_team);
517 KMP_ASSERT(serial_team->t.t_serialized);
518 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
519 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
520 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
521 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522
Joachim Protze82e94a52017-11-01 10:08:30 +0000523#if OMPT_SUPPORT
524 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000525 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
526 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000527 if (ompt_enabled.ompt_callback_implicit_task) {
528 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
529 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000530 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000531 }
532
533 // reset clear the task id only after unlinking the task
534 ompt_data_t *parent_task_data;
535 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
536
537 if (ompt_enabled.ompt_callback_parallel_end) {
538 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
539 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000540 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000541 }
542 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000543 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000544 }
545#endif
546
Jonathan Peyton30419822017-05-12 18:01:32 +0000547 /* If necessary, pop the internal control stack values and replace the team
548 * values */
549 top = serial_team->t.t_control_stack_top;
550 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
551 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
552 serial_team->t.t_control_stack_top = top->next;
553 __kmp_free(top);
554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555
Jonathan Peyton30419822017-05-12 18:01:32 +0000556 // if( serial_team -> t.t_serialized > 1 )
557 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 /* pop dispatch buffers stack */
560 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
561 {
562 dispatch_private_info_t *disp_buffer =
563 serial_team->t.t_dispatch->th_disp_buffer;
564 serial_team->t.t_dispatch->th_disp_buffer =
565 serial_team->t.t_dispatch->th_disp_buffer->next;
566 __kmp_free(disp_buffer);
567 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000568#if OMP_50_ENABLED
569 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
570#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 --serial_team->t.t_serialized;
573 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000576
577#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000578 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
579 __kmp_clear_x87_fpu_status_word();
580 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
581 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
584
Jonathan Peyton30419822017-05-12 18:01:32 +0000585 this_thr->th.th_team = serial_team->t.t_parent;
586 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 /* restore values cached in the thread */
589 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
590 this_thr->th.th_team_master =
591 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
592 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593
Jonathan Peyton30419822017-05-12 18:01:32 +0000594 /* TODO the below shouldn't need to be adjusted for serialized teams */
595 this_thr->th.th_dispatch =
596 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599
Jonathan Peyton30419822017-05-12 18:01:32 +0000600 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
601 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 if (__kmp_tasking_mode != tskm_immediate_exec) {
604 // Copy the task team from the new child / old parent team to the thread.
605 this_thr->th.th_task_team =
606 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
607 KA_TRACE(20,
608 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
609 "team %p\n",
610 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000612 } else {
613 if (__kmp_tasking_mode != tskm_immediate_exec) {
614 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
615 "depth of serial team %p to %d\n",
616 global_tid, serial_team, serial_team->t.t_serialized));
617 }
618 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619
Jonathan Peyton30419822017-05-12 18:01:32 +0000620 if (__kmp_env_consistency_check)
621 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000622#if OMPT_SUPPORT
623 if (ompt_enabled.enabled)
624 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000625 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
626 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000627#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000628}
629
630/*!
631@ingroup SYNCHRONIZATION
632@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000634Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635depending on the memory ordering convention obeyed by the compiler
636even that may not be necessary).
637*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000638void __kmpc_flush(ident_t *loc) {
639 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640
Jonathan Peyton30419822017-05-12 18:01:32 +0000641 /* need explicit __mf() here since use volatile instead in library */
642 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643
Jonathan Peyton30419822017-05-12 18:01:32 +0000644#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
645#if KMP_MIC
646// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
647// We shouldn't need it, though, since the ABI rules require that
648// * If the compiler generates NGO stores it also generates the fence
649// * If users hand-code NGO stores they should insert the fence
650// therefore no incomplete unordered stores should be visible.
651#else
652 // C74404
653 // This is to address non-temporal store instructions (sfence needed).
654 // The clflush instruction is addressed either (mfence needed).
655 // Probably the non-temporal load monvtdqa instruction should also be
656 // addressed.
657 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
658 if (!__kmp_cpuinfo.initialized) {
659 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000660 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000661 if (!__kmp_cpuinfo.sse2) {
662 // CPU cannot execute SSE2 instructions.
663 } else {
664#if KMP_COMPILER_ICC
665 _mm_mfence();
666#elif KMP_COMPILER_MSVC
667 MemoryBarrier();
668#else
669 __sync_synchronize();
670#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000671 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000672#endif // KMP_MIC
673#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
674// Nothing to see here move along
675#elif KMP_ARCH_PPC64
676// Nothing needed here (we have a real MB above).
677#if KMP_OS_CNK
678 // The flushing thread needs to yield here; this prevents a
679 // busy-waiting thread from saturating the pipeline. flush is
680 // often used in loops like this:
681 // while (!flag) {
682 // #pragma omp flush(flag)
683 // }
684 // and adding the yield here is good for at least a 10x speedup
685 // when running >2 threads per core (on the NAS LU benchmark).
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000686 __kmp_yield();
Jonathan Peyton30419822017-05-12 18:01:32 +0000687#endif
688#else
689#error Unknown or unsupported architecture
690#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000691
692#if OMPT_SUPPORT && OMPT_OPTIONAL
693 if (ompt_enabled.ompt_callback_flush) {
694 ompt_callbacks.ompt_callback(ompt_callback_flush)(
695 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
696 }
697#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698}
699
700/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701/*!
702@ingroup SYNCHRONIZATION
703@param loc source location information
704@param global_tid thread id.
705
706Execute a barrier.
707*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000708void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
709 KMP_COUNT_BLOCK(OMP_BARRIER);
710 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 if (!TCR_4(__kmp_init_parallel))
713 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000715#if OMP_50_ENABLED
716 __kmp_resume_if_soft_paused();
717#endif
718
Jonathan Peyton30419822017-05-12 18:01:32 +0000719 if (__kmp_env_consistency_check) {
720 if (loc == 0) {
721 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000722 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 __kmp_check_barrier(global_tid, ct_barrier, loc);
725 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Joachim Protze82e94a52017-11-01 10:08:30 +0000727#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000728 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000729 if (ompt_enabled.enabled) {
730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000731 if (ompt_frame->enter_frame.ptr == NULL)
732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000733 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000735#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 __kmp_threads[global_tid]->th.th_ident = loc;
737 // TODO: explicit barrier_wait_id:
738 // this function is called when 'barrier' directive is present or
739 // implicit barrier at the end of a worksharing construct.
740 // 1) better to add a per-thread barrier counter to a thread data structure
741 // 2) set to 0 when a new team is created
742 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000745#if OMPT_SUPPORT && OMPT_OPTIONAL
746 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000747 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000749#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750}
751
752/* The BARRIER for a MASTER section is always explicit */
753/*!
754@ingroup WORK_SHARING
755@param loc source location information.
756@param global_tid global thread number .
757@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
758*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000759kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
760 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 if (!TCR_4(__kmp_init_parallel))
765 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000767#if OMP_50_ENABLED
768 __kmp_resume_if_soft_paused();
769#endif
770
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 if (KMP_MASTER_GTID(global_tid)) {
772 KMP_COUNT_BLOCK(OMP_MASTER);
773 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
774 status = 1;
775 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776
Joachim Protze82e94a52017-11-01 10:08:30 +0000777#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000779 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 kmp_info_t *this_thr = __kmp_threads[global_tid];
781 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000782
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000784 ompt_callbacks.ompt_callback(ompt_callback_master)(
785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000788 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000790#endif
791
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000793#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 if (status)
795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
796 else
797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000798#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 if (status)
800 __kmp_push_sync(global_tid, ct_master, loc, NULL);
801 else
802 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000803#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000805
Jonathan Peyton30419822017-05-12 18:01:32 +0000806 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807}
808
809/*!
810@ingroup WORK_SHARING
811@param loc source location information.
812@param global_tid global thread number .
813
Jonathan Peyton30419822017-05-12 18:01:32 +0000814Mark the end of a <tt>master</tt> region. This should only be called by the
815thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000817void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
818 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822
Joachim Protze82e94a52017-11-01 10:08:30 +0000823#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000824 kmp_info_t *this_thr = __kmp_threads[global_tid];
825 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000826 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000828 ompt_callbacks.ompt_callback(ompt_callback_master)(
829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000832 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000833#endif
834
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 if (__kmp_env_consistency_check) {
836 if (global_tid < 0)
837 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 if (KMP_MASTER_GTID(global_tid))
840 __kmp_pop_sync(global_tid, ct_master, loc);
841 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842}
843
844/*!
845@ingroup WORK_SHARING
846@param loc source location information.
847@param gtid global thread number.
848
849Start execution of an <tt>ordered</tt> construct.
850*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000851void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
852 int cid = 0;
853 kmp_info_t *th;
854 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000855
Jonathan Peyton30419822017-05-12 18:01:32 +0000856 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 if (!TCR_4(__kmp_init_parallel))
859 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000861#if OMP_50_ENABLED
862 __kmp_resume_if_soft_paused();
863#endif
864
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000866 __kmp_itt_ordered_prep(gtid);
867// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000868#endif /* USE_ITT_BUILD */
869
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000871
Joachim Protze82e94a52017-11-01 10:08:30 +0000872#if OMPT_SUPPORT && OMPT_OPTIONAL
873 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000874 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000875 void *codeptr_ra;
876 if (ompt_enabled.enabled) {
877 OMPT_STORE_RETURN_ADDRESS(gtid);
878 team = __kmp_team_from_gtid(gtid);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000879 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000880 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000881 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000882 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000883
Jonathan Peyton30419822017-05-12 18:01:32 +0000884 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
886 if (ompt_enabled.ompt_callback_mutex_acquire) {
887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000889 (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000890 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000891 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000892#endif
893
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 if (th->th.th_dispatch->th_deo_fcn != 0)
895 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
896 else
897 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000898
Joachim Protze82e94a52017-11-01 10:08:30 +0000899#if OMPT_SUPPORT && OMPT_OPTIONAL
900 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000901 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000902 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000903 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000904
Jonathan Peyton30419822017-05-12 18:01:32 +0000905 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000906 if (ompt_enabled.ompt_callback_mutex_acquired) {
907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000908 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000909 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000911#endif
912
Jim Cownie5e8470a2013-09-27 10:38:44 +0000913#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000914 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915#endif /* USE_ITT_BUILD */
916}
917
918/*!
919@ingroup WORK_SHARING
920@param loc source location information.
921@param gtid global thread number.
922
923End execution of an <tt>ordered</tt> construct.
924*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000925void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
926 int cid = 0;
927 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000928
Jonathan Peyton30419822017-05-12 18:01:32 +0000929 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930
931#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000932 __kmp_itt_ordered_end(gtid);
933// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934#endif /* USE_ITT_BUILD */
935
Jonathan Peyton30419822017-05-12 18:01:32 +0000936 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000937
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 if (th->th.th_dispatch->th_dxo_fcn != 0)
939 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
940 else
941 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000942
Joachim Protze82e94a52017-11-01 10:08:30 +0000943#if OMPT_SUPPORT && OMPT_OPTIONAL
944 OMPT_STORE_RETURN_ADDRESS(gtid);
945 if (ompt_enabled.ompt_callback_mutex_released) {
946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
947 ompt_mutex_ordered,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000948 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000949 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000950 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000951#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952}
953
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000954#if KMP_USE_DYNAMIC_LOCK
955
Jonathan Peytondae13d82015-12-11 21:57:06 +0000956static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000957__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
958 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
959 // Pointer to the allocated indirect lock is written to crit, while indexing
960 // is ignored.
961 void *idx;
962 kmp_indirect_lock_t **lck;
963 lck = (kmp_indirect_lock_t **)crit;
964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966 KMP_SET_I_LOCK_LOCATION(ilk, loc);
967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
968 KA_TRACE(20,
969 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000970#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000971 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000972#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000973 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000974 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000975#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000976 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000977#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000978 // We don't really need to destroy the unclaimed lock here since it will be
979 // cleaned up at program exit.
980 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
981 }
982 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000983}
984
985// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000986#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
987 { \
988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000993 kmp_uint32 spins; \
994 KMP_FSYNC_PREPARE(l); \
995 KMP_INIT_YIELD(spins); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000997 do { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000998 if (TCR_4(__kmp_nth) > \
999 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1000 KMP_YIELD(TRUE); \
1001 } else { \
1002 KMP_YIELD_SPIN(spins); \
1003 } \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001004 __kmp_spin_backoff(&backoff); \
1005 } while ( \
1006 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001008 } \
1009 KMP_FSYNC_ACQUIRED(l); \
1010 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001011
1012// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001013#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1014 { \
1015 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001016 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1017 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1018 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1019 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001020 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001021
1022// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001023#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001024 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001025
Jonathan Peytondae13d82015-12-11 21:57:06 +00001026#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001027
Jonathan Peyton30419822017-05-12 18:01:32 +00001028#include <sys/syscall.h>
1029#include <unistd.h>
1030#ifndef FUTEX_WAIT
1031#define FUTEX_WAIT 0
1032#endif
1033#ifndef FUTEX_WAKE
1034#define FUTEX_WAKE 1
1035#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001036
1037// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001038#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1039 { \
1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041 kmp_int32 gtid_code = (gtid + 1) << 1; \
1042 KMP_MB(); \
1043 KMP_FSYNC_PREPARE(ftx); \
1044 kmp_int32 poll_val; \
1045 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1046 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1047 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1048 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1049 if (!cond) { \
1050 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1051 poll_val | \
1052 KMP_LOCK_BUSY(1, futex))) { \
1053 continue; \
1054 } \
1055 poll_val |= KMP_LOCK_BUSY(1, futex); \
1056 } \
1057 kmp_int32 rc; \
1058 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1059 NULL, NULL, 0)) != 0) { \
1060 continue; \
1061 } \
1062 gtid_code |= 1; \
1063 } \
1064 KMP_FSYNC_ACQUIRED(ftx); \
1065 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001066
1067// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001068#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1069 { \
1070 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1071 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1072 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1073 KMP_FSYNC_ACQUIRED(ftx); \
1074 rc = TRUE; \
1075 } else { \
1076 rc = FALSE; \
1077 } \
1078 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001079
1080// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001081#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1082 { \
1083 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1084 KMP_MB(); \
1085 KMP_FSYNC_RELEASING(ftx); \
1086 kmp_int32 poll_val = \
1087 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1088 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1089 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1090 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1091 } \
1092 KMP_MB(); \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001093 KMP_YIELD_OVERSUB(); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001094 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001095
Jonathan Peytondae13d82015-12-11 21:57:06 +00001096#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001097
1098#else // KMP_USE_DYNAMIC_LOCK
1099
Jonathan Peyton30419822017-05-12 18:01:32 +00001100static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1101 ident_t const *loc,
1102 kmp_int32 gtid) {
1103 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001104
Jonathan Peyton30419822017-05-12 18:01:32 +00001105 // Because of the double-check, the following load doesn't need to be volatile
1106 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 if (lck == NULL) {
1109 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 // Allocate & initialize the lock.
1112 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1113 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1114 __kmp_init_user_lock_with_checks(lck);
1115 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001116#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001117 __kmp_itt_critical_creating(lck);
1118// __kmp_itt_critical_creating() should be called *before* the first usage
1119// of underlying lock. It is the only place where we can guarantee it. There
1120// are chances the lock will destroyed with no usage, but it is not a
1121// problem, because this is not real event seen by user but rather setting
1122// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123#endif /* USE_ITT_BUILD */
1124
Jonathan Peyton30419822017-05-12 18:01:32 +00001125 // Use a cmpxchg instruction to slam the start of the critical section with
1126 // the lock pointer. If another thread beat us to it, deallocate the lock,
1127 // and use the lock that the other thread allocated.
1128 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001129
Jonathan Peyton30419822017-05-12 18:01:32 +00001130 if (status == 0) {
1131// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 __kmp_itt_critical_destroyed(lck);
1134// Let ITT know the lock is destroyed and the same memory location may be reused
1135// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 __kmp_destroy_user_lock_with_checks(lck);
1138 __kmp_user_lock_free(&idx, gtid, lck);
1139 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1140 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001142 }
1143 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001144}
1145
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001146#endif // KMP_USE_DYNAMIC_LOCK
1147
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148/*!
1149@ingroup WORK_SHARING
1150@param loc source location information.
1151@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001152@param crit identity of the critical section. This could be a pointer to a lock
1153associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154
1155Enter code protected by a `critical` construct.
1156This function blocks until the executing thread can enter the critical section.
1157*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001158void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1159 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001160#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001161#if OMPT_SUPPORT && OMPT_OPTIONAL
1162 OMPT_STORE_RETURN_ADDRESS(global_tid);
1163#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001164 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001165#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001166 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001167#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001168 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001169 ompt_thread_info_t ti;
1170#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001171 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172
Jonathan Peyton30419822017-05-12 18:01:32 +00001173 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001174
Jonathan Peyton30419822017-05-12 18:01:32 +00001175 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001177 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001178 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001179
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 if ((__kmp_user_lock_kind == lk_tas) &&
1181 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1182 lck = (kmp_user_lock_p)crit;
1183 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001184#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 else if ((__kmp_user_lock_kind == lk_futex) &&
1186 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1187 lck = (kmp_user_lock_p)crit;
1188 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001189#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001190 else { // ticket, queuing or drdpa
1191 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1192 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001193
Jonathan Peyton30419822017-05-12 18:01:32 +00001194 if (__kmp_env_consistency_check)
1195 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001196
Jonathan Peyton30419822017-05-12 18:01:32 +00001197// since the critical directive binds to all threads, not just the current
1198// team we have to check this even if we are in a serialized team.
1199// also, even if we are the uber thread, we still have to conduct the lock,
1200// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001201
1202#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001203 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001204#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001205#if OMPT_SUPPORT && OMPT_OPTIONAL
1206 OMPT_STORE_RETURN_ADDRESS(gtid);
1207 void *codeptr_ra = NULL;
1208 if (ompt_enabled.enabled) {
1209 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1210 /* OMPT state update */
1211 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001212 ti.wait_id = (ompt_wait_id_t)lck;
1213 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001214
1215 /* OMPT event callback */
1216 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1217 if (ompt_enabled.ompt_callback_mutex_acquire) {
1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1219 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001220 (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001221 }
1222 }
1223#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001224 // Value of 'crit' should be good for using as a critical_id of the critical
1225 // section directive.
1226 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001227
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001228#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001229 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001230#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001231#if OMPT_SUPPORT && OMPT_OPTIONAL
1232 if (ompt_enabled.enabled) {
1233 /* OMPT state update */
1234 ti.state = prev_state;
1235 ti.wait_id = 0;
1236
1237 /* OMPT event callback */
1238 if (ompt_enabled.ompt_callback_mutex_acquired) {
1239 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001240 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001241 }
1242 }
1243#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001244 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001245
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001246 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001248#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001249}
1250
1251#if KMP_USE_DYNAMIC_LOCK
1252
1253// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001254static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001255#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001256#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001257#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001258#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001259#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001260
1261#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001262#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001263#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001264#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001265#endif
1266
Jonathan Peyton30419822017-05-12 18:01:32 +00001267 // Hints that do not require further logic
1268 if (hint & kmp_lock_hint_hle)
1269 return KMP_TSX_LOCK(hle);
1270 if (hint & kmp_lock_hint_rtm)
1271 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1272 if (hint & kmp_lock_hint_adaptive)
1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001274
Jonathan Peyton30419822017-05-12 18:01:32 +00001275 // Rule out conflicting hints first by returning the default lock
1276 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001277 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001278 if ((hint & omp_lock_hint_speculative) &&
1279 (hint & omp_lock_hint_nonspeculative))
1280 return __kmp_user_lock_seq;
1281
1282 // Do not even consider speculation when it appears to be contended
1283 if (hint & omp_lock_hint_contended)
1284 return lockseq_queuing;
1285
1286 // Uncontended lock without speculation
1287 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1288 return lockseq_tas;
1289
1290 // HLE lock for speculation
1291 if (hint & omp_lock_hint_speculative)
1292 return KMP_TSX_LOCK(hle);
1293
1294 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001295}
1296
Joachim Protze82e94a52017-11-01 10:08:30 +00001297#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001298#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001299static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001300__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1301 if (user_lock) {
1302 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1303 case 0:
1304 break;
1305#if KMP_USE_FUTEX
1306 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001307 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001308#endif
1309 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001310 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001311#if KMP_USE_TSX
1312 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001313 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001314#endif
1315 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001316 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001317 }
1318 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1319 }
1320 KMP_ASSERT(ilock);
1321 switch (ilock->type) {
1322#if KMP_USE_TSX
1323 case locktag_adaptive:
1324 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001325 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001326#endif
1327 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001328 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001329#if KMP_USE_FUTEX
1330 case locktag_nested_futex:
1331#endif
1332 case locktag_ticket:
1333 case locktag_queuing:
1334 case locktag_drdpa:
1335 case locktag_nested_ticket:
1336 case locktag_nested_queuing:
1337 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001338 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001339 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001340 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001341 }
1342}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001343#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001344// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001345static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001346 switch (__kmp_user_lock_kind) {
1347 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001348 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001349#if KMP_USE_FUTEX
1350 case lk_futex:
1351#endif
1352 case lk_ticket:
1353 case lk_queuing:
1354 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001355 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001356#if KMP_USE_TSX
1357 case lk_hle:
1358 case lk_rtm:
1359 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001360 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001361#endif
1362 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001363 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001364 }
1365}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001366#endif // KMP_USE_DYNAMIC_LOCK
1367#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001368
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001369/*!
1370@ingroup WORK_SHARING
1371@param loc source location information.
1372@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001373@param crit identity of the critical section. This could be a pointer to a lock
1374associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001375@param hint the lock hint.
1376
Jonathan Peyton30419822017-05-12 18:01:32 +00001377Enter code protected by a `critical` construct with a hint. The hint value is
1378used to suggest a lock implementation. This function blocks until the executing
1379thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001380speculative execution and the hardware supports it.
1381*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001382void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001383 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001384 KMP_COUNT_BLOCK(OMP_CRITICAL);
1385 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001386#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001387 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001388 ompt_thread_info_t ti;
1389 // This is the case, if called from __kmpc_critical:
1390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1391 if (!codeptr)
1392 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1393#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001394
Jonathan Peyton30419822017-05-12 18:01:32 +00001395 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001396
Jonathan Peyton30419822017-05-12 18:01:32 +00001397 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1398 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001399 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001400 if (*lk == 0) {
1401 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1402 if (KMP_IS_D_LOCK(lckseq)) {
1403 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1404 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001405 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001406 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001407 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001408 }
1409 // Branch for accessing the actual lock object and set operation. This
1410 // branching is inevitable since this lock initialization does not follow the
1411 // normal dispatch path (lock table is not used).
1412 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1413 lck = (kmp_user_lock_p)lk;
1414 if (__kmp_env_consistency_check) {
1415 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1416 __kmp_map_hint_to_lock(hint));
1417 }
1418#if USE_ITT_BUILD
1419 __kmp_itt_critical_acquiring(lck);
1420#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001421#if OMPT_SUPPORT && OMPT_OPTIONAL
1422 if (ompt_enabled.enabled) {
1423 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1424 /* OMPT state update */
1425 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001426 ti.wait_id = (ompt_wait_id_t)lck;
1427 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001428
1429 /* OMPT event callback */
1430 if (ompt_enabled.ompt_callback_mutex_acquire) {
1431 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1432 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001433 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001434 }
1435 }
1436#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001437#if KMP_USE_INLINED_TAS
1438 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1439 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1440 } else
1441#elif KMP_USE_INLINED_FUTEX
1442 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1443 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1444 } else
1445#endif
1446 {
1447 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1448 }
1449 } else {
1450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1451 lck = ilk->lock;
1452 if (__kmp_env_consistency_check) {
1453 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1454 __kmp_map_hint_to_lock(hint));
1455 }
1456#if USE_ITT_BUILD
1457 __kmp_itt_critical_acquiring(lck);
1458#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001459#if OMPT_SUPPORT && OMPT_OPTIONAL
1460 if (ompt_enabled.enabled) {
1461 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1462 /* OMPT state update */
1463 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001464 ti.wait_id = (ompt_wait_id_t)lck;
1465 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001466
1467 /* OMPT event callback */
1468 if (ompt_enabled.ompt_callback_mutex_acquire) {
1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1470 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001471 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001472 }
1473 }
1474#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001475 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1476 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001477 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001478
Jim Cownie5e8470a2013-09-27 10:38:44 +00001479#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001480 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001482#if OMPT_SUPPORT && OMPT_OPTIONAL
1483 if (ompt_enabled.enabled) {
1484 /* OMPT state update */
1485 ti.state = prev_state;
1486 ti.wait_id = 0;
1487
1488 /* OMPT event callback */
1489 if (ompt_enabled.ompt_callback_mutex_acquired) {
1490 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001491 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001492 }
1493 }
1494#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001495
Jonathan Peyton30419822017-05-12 18:01:32 +00001496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1497 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001498} // __kmpc_critical_with_hint
1499
1500#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001501
1502/*!
1503@ingroup WORK_SHARING
1504@param loc source location information.
1505@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001506@param crit identity of the critical section. This could be a pointer to a lock
1507associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508
1509Leave a critical section, releasing any lock that was held during its execution.
1510*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001511void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1512 kmp_critical_name *crit) {
1513 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001514
Jonathan Peyton30419822017-05-12 18:01:32 +00001515 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001516
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001517#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001518 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1519 lck = (kmp_user_lock_p)crit;
1520 KMP_ASSERT(lck != NULL);
1521 if (__kmp_env_consistency_check) {
1522 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001523 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001524#if USE_ITT_BUILD
1525 __kmp_itt_critical_releasing(lck);
1526#endif
1527#if KMP_USE_INLINED_TAS
1528 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1529 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1530 } else
1531#elif KMP_USE_INLINED_FUTEX
1532 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1533 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1534 } else
1535#endif
1536 {
1537 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1538 }
1539 } else {
1540 kmp_indirect_lock_t *ilk =
1541 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1542 KMP_ASSERT(ilk != NULL);
1543 lck = ilk->lock;
1544 if (__kmp_env_consistency_check) {
1545 __kmp_pop_sync(global_tid, ct_critical, loc);
1546 }
1547#if USE_ITT_BUILD
1548 __kmp_itt_critical_releasing(lck);
1549#endif
1550 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1551 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001552
1553#else // KMP_USE_DYNAMIC_LOCK
1554
Jonathan Peyton30419822017-05-12 18:01:32 +00001555 if ((__kmp_user_lock_kind == lk_tas) &&
1556 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1557 lck = (kmp_user_lock_p)crit;
1558 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001559#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 else if ((__kmp_user_lock_kind == lk_futex) &&
1561 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1562 lck = (kmp_user_lock_p)crit;
1563 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001565 else { // ticket, queuing or drdpa
1566 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1567 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
Jonathan Peyton30419822017-05-12 18:01:32 +00001569 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570
Jonathan Peyton30419822017-05-12 18:01:32 +00001571 if (__kmp_env_consistency_check)
1572 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001573
1574#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001575 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001577 // Value of 'crit' should be good for using as a critical_id of the critical
1578 // section directive.
1579 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580
Joachim Protze82e94a52017-11-01 10:08:30 +00001581#endif // KMP_USE_DYNAMIC_LOCK
1582
1583#if OMPT_SUPPORT && OMPT_OPTIONAL
1584 /* OMPT release event triggers after lock is released; place here to trigger
1585 * for all #if branches */
1586 OMPT_STORE_RETURN_ADDRESS(global_tid);
1587 if (ompt_enabled.ompt_callback_mutex_released) {
1588 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001589 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001590 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001591#endif
1592
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 KMP_POP_PARTITIONED_TIMER();
1594 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595}
1596
1597/*!
1598@ingroup SYNCHRONIZATION
1599@param loc source location information
1600@param global_tid thread id.
1601@return one if the thread should execute the master block, zero otherwise
1602
Jonathan Peyton30419822017-05-12 18:01:32 +00001603Start execution of a combined barrier and master. The barrier is executed inside
1604this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001606kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1607 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610
Jonathan Peyton30419822017-05-12 18:01:32 +00001611 if (!TCR_4(__kmp_init_parallel))
1612 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001613
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001614#if OMP_50_ENABLED
1615 __kmp_resume_if_soft_paused();
1616#endif
1617
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 if (__kmp_env_consistency_check)
1619 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001620
Joachim Protze82e94a52017-11-01 10:08:30 +00001621#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001622 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001623 if (ompt_enabled.enabled) {
1624 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001625 if (ompt_frame->enter_frame.ptr == NULL)
1626 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001627 OMPT_STORE_RETURN_ADDRESS(global_tid);
1628 }
1629#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001630#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001631 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001632#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001633 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001634#if OMPT_SUPPORT && OMPT_OPTIONAL
1635 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001636 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001637 }
1638#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641}
1642
1643/*!
1644@ingroup SYNCHRONIZATION
1645@param loc source location information
1646@param global_tid thread id.
1647
1648Complete the execution of a combined barrier and master. This function should
1649only be called at the completion of the <tt>master</tt> code. Other threads will
1650still be waiting at the barrier and this call releases them.
1651*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001652void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1653 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654
Jonathan Peyton30419822017-05-12 18:01:32 +00001655 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001656}
1657
1658/*!
1659@ingroup SYNCHRONIZATION
1660@param loc source location information
1661@param global_tid thread id.
1662@return one if the thread should execute the master block, zero otherwise
1663
1664Start execution of a combined barrier and master(nowait) construct.
1665The barrier is executed inside this function.
1666There is no equivalent "end" function, since the
1667*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001668kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1669 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001672
Jonathan Peyton30419822017-05-12 18:01:32 +00001673 if (!TCR_4(__kmp_init_parallel))
1674 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001676#if OMP_50_ENABLED
1677 __kmp_resume_if_soft_paused();
1678#endif
1679
Jonathan Peyton30419822017-05-12 18:01:32 +00001680 if (__kmp_env_consistency_check) {
1681 if (loc == 0) {
1682 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001684 __kmp_check_barrier(global_tid, ct_barrier, loc);
1685 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001686
Joachim Protze82e94a52017-11-01 10:08:30 +00001687#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001688 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001689 if (ompt_enabled.enabled) {
1690 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001691 if (ompt_frame->enter_frame.ptr == NULL)
1692 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001693 OMPT_STORE_RETURN_ADDRESS(global_tid);
1694 }
1695#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001698#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001699 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001700#if OMPT_SUPPORT && OMPT_OPTIONAL
1701 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001702 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001703 }
1704#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705
Jonathan Peyton30419822017-05-12 18:01:32 +00001706 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707
Jonathan Peyton30419822017-05-12 18:01:32 +00001708 if (__kmp_env_consistency_check) {
1709 /* there's no __kmpc_end_master called; so the (stats) */
1710 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 if (global_tid < 0) {
1713 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001714 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 if (ret) {
1716 /* only one thread should do the pop since only */
1717 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 __kmp_pop_sync(global_tid, ct_master, loc);
1720 }
1721 }
1722
1723 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724}
1725
1726/* The BARRIER for a SINGLE process section is always explicit */
1727/*!
1728@ingroup WORK_SHARING
1729@param loc source location information
1730@param global_tid global thread number
1731@return One if this thread should execute the single construct, zero otherwise.
1732
1733Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001734There are no implicit barriers in the two "single" calls, rather the compiler
1735should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736*/
1737
Jonathan Peyton30419822017-05-12 18:01:32 +00001738kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1739 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001740
Jonathan Peyton30419822017-05-12 18:01:32 +00001741 if (rc) {
1742 // We are going to execute the single statement, so we should count it.
1743 KMP_COUNT_BLOCK(OMP_SINGLE);
1744 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1745 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001746
Joachim Protze82e94a52017-11-01 10:08:30 +00001747#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001748 kmp_info_t *this_thr = __kmp_threads[global_tid];
1749 kmp_team_t *team = this_thr->th.th_team;
1750 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751
Joachim Protze82e94a52017-11-01 10:08:30 +00001752 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001753 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001754 if (ompt_enabled.ompt_callback_work) {
1755 ompt_callbacks.ompt_callback(ompt_callback_work)(
1756 ompt_work_single_executor, ompt_scope_begin,
1757 &(team->t.ompt_team_info.parallel_data),
1758 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1759 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 }
1761 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001762 if (ompt_enabled.ompt_callback_work) {
1763 ompt_callbacks.ompt_callback(ompt_callback_work)(
1764 ompt_work_single_other, ompt_scope_begin,
1765 &(team->t.ompt_team_info.parallel_data),
1766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1767 1, OMPT_GET_RETURN_ADDRESS(0));
1768 ompt_callbacks.ompt_callback(ompt_callback_work)(
1769 ompt_work_single_other, ompt_scope_end,
1770 &(team->t.ompt_team_info.parallel_data),
1771 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1772 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001775 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001776#endif
1777
Jonathan Peyton30419822017-05-12 18:01:32 +00001778 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779}
1780
1781/*!
1782@ingroup WORK_SHARING
1783@param loc source location information
1784@param global_tid global thread number
1785
1786Mark the end of a <tt>single</tt> construct. This function should
1787only be called by the thread that executed the block of code protected
1788by the `single` construct.
1789*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001790void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1791 __kmp_exit_single(global_tid);
1792 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793
Joachim Protze82e94a52017-11-01 10:08:30 +00001794#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001795 kmp_info_t *this_thr = __kmp_threads[global_tid];
1796 kmp_team_t *team = this_thr->th.th_team;
1797 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001798
Joachim Protze82e94a52017-11-01 10:08:30 +00001799 if (ompt_enabled.ompt_callback_work) {
1800 ompt_callbacks.ompt_callback(ompt_callback_work)(
1801 ompt_work_single_executor, ompt_scope_end,
1802 &(team->t.ompt_team_info.parallel_data),
1803 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1804 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001805 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001807}
1808
1809/*!
1810@ingroup WORK_SHARING
1811@param loc Source location
1812@param global_tid Global thread id
1813
1814Mark the end of a statically scheduled loop.
1815*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001816void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001817 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819
Joachim Protze82e94a52017-11-01 10:08:30 +00001820#if OMPT_SUPPORT && OMPT_OPTIONAL
1821 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001822 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001824 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1825 // Determine workshare type
1826 if (loc != NULL) {
1827 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1828 ompt_work_type = ompt_work_loop;
1829 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1830 ompt_work_type = ompt_work_sections;
1831 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1832 ompt_work_type = ompt_work_distribute;
1833 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001834 // use default set above.
1835 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001836 }
1837 KMP_DEBUG_ASSERT(ompt_work_type);
1838 }
1839 ompt_callbacks.ompt_callback(ompt_callback_work)(
1840 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1841 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001843#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 if (__kmp_env_consistency_check)
1845 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001846}
1847
Jonathan Peyton30419822017-05-12 18:01:32 +00001848// User routines which take C-style arguments (call by value)
1849// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850
Jonathan Peyton30419822017-05-12 18:01:32 +00001851void ompc_set_num_threads(int arg) {
1852 // !!!!! TODO: check the per-task binding
1853 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001854}
1855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856void ompc_set_dynamic(int flag) {
1857 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001858
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 /* For the thread-private implementation of the internal controls */
1860 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001863
Jonathan Peyton30419822017-05-12 18:01:32 +00001864 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865}
1866
Jonathan Peyton30419822017-05-12 18:01:32 +00001867void ompc_set_nested(int flag) {
1868 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001869
Jonathan Peyton30419822017-05-12 18:01:32 +00001870 /* For the thread-private internal controls implementation */
1871 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872
Jonathan Peyton30419822017-05-12 18:01:32 +00001873 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001874
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001875 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876}
1877
Jonathan Peyton30419822017-05-12 18:01:32 +00001878void ompc_set_max_active_levels(int max_active_levels) {
1879 /* TO DO */
1880 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 /* For the per-thread internal controls implementation */
1883 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001884}
1885
Jonathan Peyton30419822017-05-12 18:01:32 +00001886void ompc_set_schedule(omp_sched_t kind, int modifier) {
1887 // !!!!! TODO: check the per-task binding
1888 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001889}
1890
Jonathan Peyton30419822017-05-12 18:01:32 +00001891int ompc_get_ancestor_thread_num(int level) {
1892 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001893}
1894
Jonathan Peyton30419822017-05-12 18:01:32 +00001895int ompc_get_team_size(int level) {
1896 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001897}
1898
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001899#if OMP_50_ENABLED
1900/* OpenMP 5.0 Affinity Format API */
1901
1902void ompc_set_affinity_format(char const *format) {
1903 if (!__kmp_init_serial) {
1904 __kmp_serial_initialize();
1905 }
1906 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1907 format, KMP_STRLEN(format) + 1);
1908}
1909
1910size_t ompc_get_affinity_format(char *buffer, size_t size) {
1911 size_t format_size;
1912 if (!__kmp_init_serial) {
1913 __kmp_serial_initialize();
1914 }
1915 format_size = KMP_STRLEN(__kmp_affinity_format);
1916 if (buffer && size) {
1917 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1918 format_size + 1);
1919 }
1920 return format_size;
1921}
1922
1923void ompc_display_affinity(char const *format) {
1924 int gtid;
1925 if (!TCR_4(__kmp_init_middle)) {
1926 __kmp_middle_initialize();
1927 }
1928 gtid = __kmp_get_gtid();
1929 __kmp_aux_display_affinity(gtid, format);
1930}
1931
1932size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1933 char const *format) {
1934 int gtid;
1935 size_t num_required;
1936 kmp_str_buf_t capture_buf;
1937 if (!TCR_4(__kmp_init_middle)) {
1938 __kmp_middle_initialize();
1939 }
1940 gtid = __kmp_get_gtid();
1941 __kmp_str_buf_init(&capture_buf);
1942 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1943 if (buffer && buf_size) {
1944 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1945 capture_buf.used + 1);
1946 }
1947 __kmp_str_buf_free(&capture_buf);
1948 return num_required;
1949}
1950#endif /* OMP_50_ENABLED */
1951
Jonathan Peyton30419822017-05-12 18:01:32 +00001952void kmpc_set_stacksize(int arg) {
1953 // __kmp_aux_set_stacksize initializes the library if needed
1954 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955}
1956
Jonathan Peyton30419822017-05-12 18:01:32 +00001957void kmpc_set_stacksize_s(size_t arg) {
1958 // __kmp_aux_set_stacksize initializes the library if needed
1959 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960}
1961
Jonathan Peyton30419822017-05-12 18:01:32 +00001962void kmpc_set_blocktime(int arg) {
1963 int gtid, tid;
1964 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965
Jonathan Peyton30419822017-05-12 18:01:32 +00001966 gtid = __kmp_entry_gtid();
1967 tid = __kmp_tid_from_gtid(gtid);
1968 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969
Jonathan Peyton30419822017-05-12 18:01:32 +00001970 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971}
1972
Jonathan Peyton30419822017-05-12 18:01:32 +00001973void kmpc_set_library(int arg) {
1974 // __kmp_user_set_library initializes the library if needed
1975 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976}
1977
Jonathan Peyton30419822017-05-12 18:01:32 +00001978void kmpc_set_defaults(char const *str) {
1979 // __kmp_aux_set_defaults initializes the library if needed
1980 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981}
1982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983void kmpc_set_disp_num_buffers(int arg) {
1984 // ignore after initialization because some teams have already
1985 // allocated dispatch buffers
1986 if (__kmp_init_serial == 0 && arg > 0)
1987 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001988}
1989
Jonathan Peyton30419822017-05-12 18:01:32 +00001990int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001991#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001993#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001994 if (!TCR_4(__kmp_init_middle)) {
1995 __kmp_middle_initialize();
1996 }
1997 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998#endif
1999}
2000
Jonathan Peyton30419822017-05-12 18:01:32 +00002001int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002002#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002003 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002005 if (!TCR_4(__kmp_init_middle)) {
2006 __kmp_middle_initialize();
2007 }
2008 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009#endif
2010}
2011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002013#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002014 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002016 if (!TCR_4(__kmp_init_middle)) {
2017 __kmp_middle_initialize();
2018 }
2019 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020#endif
2021}
2022
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023/* -------------------------------------------------------------------------- */
2024/*!
2025@ingroup THREADPRIVATE
2026@param loc source location information
2027@param gtid global thread number
2028@param cpy_size size of the cpy_data buffer
2029@param cpy_data pointer to data to be copied
2030@param cpy_func helper function to call for copying data
2031@param didit flag variable: 1=single thread; 0=not single thread
2032
Jonathan Peyton30419822017-05-12 18:01:32 +00002033__kmpc_copyprivate implements the interface for the private data broadcast
2034needed for the copyprivate clause associated with a single region in an
2035OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002037One of the threads (called the single thread) should have the <tt>didit</tt>
2038variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039All threads pass a pointer to a data buffer (cpy_data) that they have built.
2040
Jonathan Peyton30419822017-05-12 18:01:32 +00002041The OpenMP specification forbids the use of nowait on the single region when a
2042copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2043barrier internally to avoid race conditions, so the code generation for the
2044single region should avoid generating a barrier after the call to @ref
2045__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046
2047The <tt>gtid</tt> parameter is the global thread id for the current thread.
2048The <tt>loc</tt> parameter is a pointer to source location information.
2049
Jonathan Peyton30419822017-05-12 18:01:32 +00002050Internal implementation: The single thread will first copy its descriptor
2051address (cpy_data) to a team-private location, then the other threads will each
2052call the function pointed to by the parameter cpy_func, which carries out the
2053copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054
Jonathan Peyton30419822017-05-12 18:01:32 +00002055The cpy_func routine used for the copy and the contents of the data area defined
2056by cpy_data and cpy_size may be built in any fashion that will allow the copy
2057to be done. For instance, the cpy_data buffer can hold the actual data to be
2058copied or it may hold a list of pointers to the data. The cpy_func routine must
2059interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060
2061The interface to cpy_func is as follows:
2062@code
2063void cpy_func( void *destination, void *source )
2064@endcode
2065where void *destination is the cpy_data pointer for the thread being copied to
2066and void *source is the cpy_data pointer for the thread being copied from.
2067*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002068void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2069 void *cpy_data, void (*cpy_func)(void *, void *),
2070 kmp_int32 didit) {
2071 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002072
Jonathan Peyton30419822017-05-12 18:01:32 +00002073 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002074
Jonathan Peyton30419822017-05-12 18:01:32 +00002075 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002076
Jonathan Peyton30419822017-05-12 18:01:32 +00002077 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
Jonathan Peyton30419822017-05-12 18:01:32 +00002079 if (__kmp_env_consistency_check) {
2080 if (loc == 0) {
2081 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002082 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002083 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002084
Jonathan Peyton30419822017-05-12 18:01:32 +00002085 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086
Jonathan Peyton30419822017-05-12 18:01:32 +00002087 if (didit)
2088 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089
Joachim Protze82e94a52017-11-01 10:08:30 +00002090#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002091 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002092 if (ompt_enabled.enabled) {
2093 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002094 if (ompt_frame->enter_frame.ptr == NULL)
2095 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002096 OMPT_STORE_RETURN_ADDRESS(gtid);
2097 }
2098#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002099/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002101 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002102#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002103 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104
Jonathan Peyton30419822017-05-12 18:01:32 +00002105 if (!didit)
2106 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107
Jonathan Peyton30419822017-05-12 18:01:32 +00002108// Consider next barrier a user-visible barrier for barrier region boundaries
2109// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110
Joachim Protze82e94a52017-11-01 10:08:30 +00002111#if OMPT_SUPPORT
2112 if (ompt_enabled.enabled) {
2113 OMPT_STORE_RETURN_ADDRESS(gtid);
2114 }
2115#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002116#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002117 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2118// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002121#if OMPT_SUPPORT && OMPT_OPTIONAL
2122 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002123 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002124 }
2125#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126}
2127
2128/* -------------------------------------------------------------------------- */
2129
Jonathan Peyton30419822017-05-12 18:01:32 +00002130#define INIT_LOCK __kmp_init_user_lock_with_checks
2131#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2132#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2133#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2134#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2135#define ACQUIRE_NESTED_LOCK_TIMED \
2136 __kmp_acquire_nested_user_lock_with_checks_timed
2137#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2138#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2139#define TEST_LOCK __kmp_test_user_lock_with_checks
2140#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2141#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2142#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002143
Jonathan Peyton30419822017-05-12 18:01:32 +00002144// TODO: Make check abort messages use location info & pass it into
2145// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002146
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002147#if KMP_USE_DYNAMIC_LOCK
2148
2149// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002150static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2151 kmp_dyna_lockseq_t seq) {
2152 if (KMP_IS_D_LOCK(seq)) {
2153 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002154#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002155 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002156#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002157 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002158 KMP_INIT_I_LOCK(lock, seq);
2159#if USE_ITT_BUILD
2160 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2161 __kmp_itt_lock_creating(ilk->lock, loc);
2162#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002163 }
2164}
2165
2166// internal nest lock initializer
2167static __forceinline void
2168__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2169 kmp_dyna_lockseq_t seq) {
2170#if KMP_USE_TSX
2171 // Don't have nested lock implementation for speculative locks
2172 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2173 seq = __kmp_user_lock_seq;
2174#endif
2175 switch (seq) {
2176 case lockseq_tas:
2177 seq = lockseq_nested_tas;
2178 break;
2179#if KMP_USE_FUTEX
2180 case lockseq_futex:
2181 seq = lockseq_nested_futex;
2182 break;
2183#endif
2184 case lockseq_ticket:
2185 seq = lockseq_nested_ticket;
2186 break;
2187 case lockseq_queuing:
2188 seq = lockseq_nested_queuing;
2189 break;
2190 case lockseq_drdpa:
2191 seq = lockseq_nested_drdpa;
2192 break;
2193 default:
2194 seq = lockseq_nested_queuing;
2195 }
2196 KMP_INIT_I_LOCK(lock, seq);
2197#if USE_ITT_BUILD
2198 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2199 __kmp_itt_lock_creating(ilk->lock, loc);
2200#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002201}
2202
2203/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002204void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2205 uintptr_t hint) {
2206 KMP_DEBUG_ASSERT(__kmp_init_serial);
2207 if (__kmp_env_consistency_check && user_lock == NULL) {
2208 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2209 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002210
Jonathan Peyton30419822017-05-12 18:01:32 +00002211 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002212
2213#if OMPT_SUPPORT && OMPT_OPTIONAL
2214 // This is the case, if called from omp_init_lock_with_hint:
2215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2216 if (!codeptr)
2217 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2218 if (ompt_enabled.ompt_callback_lock_init) {
2219 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2220 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002221 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002222 codeptr);
2223 }
2224#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002225}
2226
2227/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002228void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2229 void **user_lock, uintptr_t hint) {
2230 KMP_DEBUG_ASSERT(__kmp_init_serial);
2231 if (__kmp_env_consistency_check && user_lock == NULL) {
2232 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2233 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002234
Jonathan Peyton30419822017-05-12 18:01:32 +00002235 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002236
2237#if OMPT_SUPPORT && OMPT_OPTIONAL
2238 // This is the case, if called from omp_init_lock_with_hint:
2239 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2240 if (!codeptr)
2241 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2242 if (ompt_enabled.ompt_callback_lock_init) {
2243 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2244 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002245 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002246 codeptr);
2247 }
2248#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002249}
2250
2251#endif // KMP_USE_DYNAMIC_LOCK
2252
Jim Cownie5e8470a2013-09-27 10:38:44 +00002253/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002254void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002255#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002256
2257 KMP_DEBUG_ASSERT(__kmp_init_serial);
2258 if (__kmp_env_consistency_check && user_lock == NULL) {
2259 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2260 }
2261 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002262
Joachim Protze82e94a52017-11-01 10:08:30 +00002263#if OMPT_SUPPORT && OMPT_OPTIONAL
2264 // This is the case, if called from omp_init_lock_with_hint:
2265 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2266 if (!codeptr)
2267 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2268 if (ompt_enabled.ompt_callback_lock_init) {
2269 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2270 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002271 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002272 codeptr);
2273 }
2274#endif
2275
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002276#else // KMP_USE_DYNAMIC_LOCK
2277
Jonathan Peyton30419822017-05-12 18:01:32 +00002278 static char const *const func = "omp_init_lock";
2279 kmp_user_lock_p lck;
2280 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281
Jonathan Peyton30419822017-05-12 18:01:32 +00002282 if (__kmp_env_consistency_check) {
2283 if (user_lock == NULL) {
2284 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002286 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287
Jonathan Peyton30419822017-05-12 18:01:32 +00002288 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289
Jonathan Peyton30419822017-05-12 18:01:32 +00002290 if ((__kmp_user_lock_kind == lk_tas) &&
2291 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2292 lck = (kmp_user_lock_p)user_lock;
2293 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002294#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002295 else if ((__kmp_user_lock_kind == lk_futex) &&
2296 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2297 lck = (kmp_user_lock_p)user_lock;
2298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002300 else {
2301 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2302 }
2303 INIT_LOCK(lck);
2304 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305
Joachim Protze82e94a52017-11-01 10:08:30 +00002306#if OMPT_SUPPORT && OMPT_OPTIONAL
2307 // This is the case, if called from omp_init_lock_with_hint:
2308 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2309 if (!codeptr)
2310 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2311 if (ompt_enabled.ompt_callback_lock_init) {
2312 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2313 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002314 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002315 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002316#endif
2317
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002320#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002321
2322#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323} // __kmpc_init_lock
2324
2325/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002326void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002327#if KMP_USE_DYNAMIC_LOCK
2328
Jonathan Peyton30419822017-05-12 18:01:32 +00002329 KMP_DEBUG_ASSERT(__kmp_init_serial);
2330 if (__kmp_env_consistency_check && user_lock == NULL) {
2331 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2332 }
2333 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002334
Joachim Protze82e94a52017-11-01 10:08:30 +00002335#if OMPT_SUPPORT && OMPT_OPTIONAL
2336 // This is the case, if called from omp_init_lock_with_hint:
2337 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2338 if (!codeptr)
2339 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2340 if (ompt_enabled.ompt_callback_lock_init) {
2341 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2342 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002343 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002344 codeptr);
2345 }
2346#endif
2347
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002348#else // KMP_USE_DYNAMIC_LOCK
2349
Jonathan Peyton30419822017-05-12 18:01:32 +00002350 static char const *const func = "omp_init_nest_lock";
2351 kmp_user_lock_p lck;
2352 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002353
Jonathan Peyton30419822017-05-12 18:01:32 +00002354 if (__kmp_env_consistency_check) {
2355 if (user_lock == NULL) {
2356 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002357 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002358 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361
Jonathan Peyton30419822017-05-12 18:01:32 +00002362 if ((__kmp_user_lock_kind == lk_tas) &&
2363 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2364 OMP_NEST_LOCK_T_SIZE)) {
2365 lck = (kmp_user_lock_p)user_lock;
2366 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002367#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 else if ((__kmp_user_lock_kind == lk_futex) &&
2369 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2370 OMP_NEST_LOCK_T_SIZE)) {
2371 lck = (kmp_user_lock_p)user_lock;
2372 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002373#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002374 else {
2375 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2376 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377
Jonathan Peyton30419822017-05-12 18:01:32 +00002378 INIT_NESTED_LOCK(lck);
2379 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380
Joachim Protze82e94a52017-11-01 10:08:30 +00002381#if OMPT_SUPPORT && OMPT_OPTIONAL
2382 // This is the case, if called from omp_init_lock_with_hint:
2383 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2384 if (!codeptr)
2385 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2386 if (ompt_enabled.ompt_callback_lock_init) {
2387 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2388 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002389 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002390 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002391#endif
2392
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002394 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002396
2397#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002398} // __kmpc_init_nest_lock
2399
Jonathan Peyton30419822017-05-12 18:01:32 +00002400void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002401#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002402
Jonathan Peyton30419822017-05-12 18:01:32 +00002403#if USE_ITT_BUILD
2404 kmp_user_lock_p lck;
2405 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2406 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2407 } else {
2408 lck = (kmp_user_lock_p)user_lock;
2409 }
2410 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002411#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002412#if OMPT_SUPPORT && OMPT_OPTIONAL
2413 // This is the case, if called from omp_init_lock_with_hint:
2414 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2415 if (!codeptr)
2416 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2417 if (ompt_enabled.ompt_callback_lock_destroy) {
2418 kmp_user_lock_p lck;
2419 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2420 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2421 } else {
2422 lck = (kmp_user_lock_p)user_lock;
2423 }
2424 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002425 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002426 }
2427#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002428 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2429#else
2430 kmp_user_lock_p lck;
2431
2432 if ((__kmp_user_lock_kind == lk_tas) &&
2433 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2434 lck = (kmp_user_lock_p)user_lock;
2435 }
2436#if KMP_USE_FUTEX
2437 else if ((__kmp_user_lock_kind == lk_futex) &&
2438 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2439 lck = (kmp_user_lock_p)user_lock;
2440 }
2441#endif
2442 else {
2443 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2444 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445
Joachim Protze82e94a52017-11-01 10:08:30 +00002446#if OMPT_SUPPORT && OMPT_OPTIONAL
2447 // This is the case, if called from omp_init_lock_with_hint:
2448 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2449 if (!codeptr)
2450 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2451 if (ompt_enabled.ompt_callback_lock_destroy) {
2452 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002453 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002454 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002455#endif
2456
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002458 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002460 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461
Jonathan Peyton30419822017-05-12 18:01:32 +00002462 if ((__kmp_user_lock_kind == lk_tas) &&
2463 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2464 ;
2465 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002466#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002467 else if ((__kmp_user_lock_kind == lk_futex) &&
2468 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2469 ;
2470 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002472 else {
2473 __kmp_user_lock_free(user_lock, gtid, lck);
2474 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002475#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476} // __kmpc_destroy_lock
2477
2478/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002479void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002480#if KMP_USE_DYNAMIC_LOCK
2481
Jonathan Peyton30419822017-05-12 18:01:32 +00002482#if USE_ITT_BUILD
2483 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2484 __kmp_itt_lock_destroyed(ilk->lock);
2485#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002486#if OMPT_SUPPORT && OMPT_OPTIONAL
2487 // This is the case, if called from omp_init_lock_with_hint:
2488 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2489 if (!codeptr)
2490 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2491 if (ompt_enabled.ompt_callback_lock_destroy) {
2492 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002493 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002494 }
2495#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002496 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002497
2498#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499
Jonathan Peyton30419822017-05-12 18:01:32 +00002500 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002501
Jonathan Peyton30419822017-05-12 18:01:32 +00002502 if ((__kmp_user_lock_kind == lk_tas) &&
2503 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2504 OMP_NEST_LOCK_T_SIZE)) {
2505 lck = (kmp_user_lock_p)user_lock;
2506 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002507#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002508 else if ((__kmp_user_lock_kind == lk_futex) &&
2509 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2510 OMP_NEST_LOCK_T_SIZE)) {
2511 lck = (kmp_user_lock_p)user_lock;
2512 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002513#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002514 else {
2515 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2516 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517
Joachim Protze82e94a52017-11-01 10:08:30 +00002518#if OMPT_SUPPORT && OMPT_OPTIONAL
2519 // This is the case, if called from omp_init_lock_with_hint:
2520 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2521 if (!codeptr)
2522 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2523 if (ompt_enabled.ompt_callback_lock_destroy) {
2524 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002525 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002526 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002527#endif
2528
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002530 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002531#endif /* USE_ITT_BUILD */
2532
Jonathan Peyton30419822017-05-12 18:01:32 +00002533 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534
Jonathan Peyton30419822017-05-12 18:01:32 +00002535 if ((__kmp_user_lock_kind == lk_tas) &&
2536 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2537 OMP_NEST_LOCK_T_SIZE)) {
2538 ;
2539 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002540#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002541 else if ((__kmp_user_lock_kind == lk_futex) &&
2542 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2543 OMP_NEST_LOCK_T_SIZE)) {
2544 ;
2545 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002547 else {
2548 __kmp_user_lock_free(user_lock, gtid, lck);
2549 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002550#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002551} // __kmpc_destroy_nest_lock
2552
Jonathan Peyton30419822017-05-12 18:01:32 +00002553void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2554 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002555#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 int tag = KMP_EXTRACT_D_TAG(user_lock);
2557#if USE_ITT_BUILD
2558 __kmp_itt_lock_acquiring(
2559 (kmp_user_lock_p)
2560 user_lock); // itt function will get to the right lock object.
2561#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002562#if OMPT_SUPPORT && OMPT_OPTIONAL
2563 // This is the case, if called from omp_init_lock_with_hint:
2564 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2565 if (!codeptr)
2566 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2567 if (ompt_enabled.ompt_callback_mutex_acquire) {
2568 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2569 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002570 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002571 codeptr);
2572 }
2573#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002574#if KMP_USE_INLINED_TAS
2575 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2576 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2577 } else
2578#elif KMP_USE_INLINED_FUTEX
2579 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2580 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2581 } else
2582#endif
2583 {
2584 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2585 }
2586#if USE_ITT_BUILD
2587 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2588#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002589#if OMPT_SUPPORT && OMPT_OPTIONAL
2590 if (ompt_enabled.ompt_callback_mutex_acquired) {
2591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002592 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002593 }
2594#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002595
2596#else // KMP_USE_DYNAMIC_LOCK
2597
Jonathan Peyton30419822017-05-12 18:01:32 +00002598 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 if ((__kmp_user_lock_kind == lk_tas) &&
2601 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2602 lck = (kmp_user_lock_p)user_lock;
2603 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002604#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 else if ((__kmp_user_lock_kind == lk_futex) &&
2606 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2607 lck = (kmp_user_lock_p)user_lock;
2608 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 else {
2611 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2612 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613
2614#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002615 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002617#if OMPT_SUPPORT && OMPT_OPTIONAL
2618 // This is the case, if called from omp_init_lock_with_hint:
2619 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2620 if (!codeptr)
2621 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2622 if (ompt_enabled.ompt_callback_mutex_acquire) {
2623 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2624 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002625 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002626 }
2627#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002628
Jonathan Peyton30419822017-05-12 18:01:32 +00002629 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002630
2631#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002632 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002633#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634
Joachim Protze82e94a52017-11-01 10:08:30 +00002635#if OMPT_SUPPORT && OMPT_OPTIONAL
2636 if (ompt_enabled.ompt_callback_mutex_acquired) {
2637 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002638 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002639 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002640#endif
2641
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002642#endif // KMP_USE_DYNAMIC_LOCK
2643}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002644
Jonathan Peyton30419822017-05-12 18:01:32 +00002645void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002646#if KMP_USE_DYNAMIC_LOCK
2647
Jonathan Peyton30419822017-05-12 18:01:32 +00002648#if USE_ITT_BUILD
2649 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2650#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002651#if OMPT_SUPPORT && OMPT_OPTIONAL
2652 // This is the case, if called from omp_init_lock_with_hint:
2653 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2654 if (!codeptr)
2655 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2656 if (ompt_enabled.enabled) {
2657 if (ompt_enabled.ompt_callback_mutex_acquire) {
2658 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2659 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002660 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002661 codeptr);
2662 }
2663 }
2664#endif
2665 int acquire_status =
2666 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002667 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002668#if USE_ITT_BUILD
2669 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002670#endif
2671
Joachim Protze82e94a52017-11-01 10:08:30 +00002672#if OMPT_SUPPORT && OMPT_OPTIONAL
2673 if (ompt_enabled.enabled) {
2674 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2675 if (ompt_enabled.ompt_callback_mutex_acquired) {
2676 // lock_first
2677 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002678 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002679 }
2680 } else {
2681 if (ompt_enabled.ompt_callback_nest_lock) {
2682 // lock_next
2683 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002684 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002685 }
2686 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002687 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002688#endif
2689
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002690#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002691 int acquire_status;
2692 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002693
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 if ((__kmp_user_lock_kind == lk_tas) &&
2695 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2696 OMP_NEST_LOCK_T_SIZE)) {
2697 lck = (kmp_user_lock_p)user_lock;
2698 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002699#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 else if ((__kmp_user_lock_kind == lk_futex) &&
2701 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZE)) {
2703 lck = (kmp_user_lock_p)user_lock;
2704 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002705#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 else {
2707 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2708 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709
2710#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002713#if OMPT_SUPPORT && OMPT_OPTIONAL
2714 // This is the case, if called from omp_init_lock_with_hint:
2715 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2716 if (!codeptr)
2717 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2718 if (ompt_enabled.enabled) {
2719 if (ompt_enabled.ompt_callback_mutex_acquire) {
2720 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2721 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002722 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002723 }
2724 }
2725#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726
Jonathan Peyton30419822017-05-12 18:01:32 +00002727 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728
2729#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002732
Joachim Protze82e94a52017-11-01 10:08:30 +00002733#if OMPT_SUPPORT && OMPT_OPTIONAL
2734 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002736 if (ompt_enabled.ompt_callback_mutex_acquired) {
2737 // lock_first
2738 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002739 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002740 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002741 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002742 if (ompt_enabled.ompt_callback_nest_lock) {
2743 // lock_next
2744 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002745 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002746 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002747 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002749#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002750
2751#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752}
2753
Jonathan Peyton30419822017-05-12 18:01:32 +00002754void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002755#if KMP_USE_DYNAMIC_LOCK
2756
Jonathan Peyton30419822017-05-12 18:01:32 +00002757 int tag = KMP_EXTRACT_D_TAG(user_lock);
2758#if USE_ITT_BUILD
2759 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2760#endif
2761#if KMP_USE_INLINED_TAS
2762 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2763 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2764 } else
2765#elif KMP_USE_INLINED_FUTEX
2766 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2767 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2768 } else
2769#endif
2770 {
2771 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2772 }
2773
Joachim Protze82e94a52017-11-01 10:08:30 +00002774#if OMPT_SUPPORT && OMPT_OPTIONAL
2775 // This is the case, if called from omp_init_lock_with_hint:
2776 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2777 if (!codeptr)
2778 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2779 if (ompt_enabled.ompt_callback_mutex_released) {
2780 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002781 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002782 }
2783#endif
2784
Jonathan Peyton30419822017-05-12 18:01:32 +00002785#else // KMP_USE_DYNAMIC_LOCK
2786
2787 kmp_user_lock_p lck;
2788
2789 /* Can't use serial interval since not block structured */
2790 /* release the lock */
2791
2792 if ((__kmp_user_lock_kind == lk_tas) &&
2793 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2794#if KMP_OS_LINUX && \
2795 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2796// "fast" path implemented to fix customer performance issue
2797#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002798 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002800 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2801 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002802
2803#if OMPT_SUPPORT && OMPT_OPTIONAL
2804 // This is the case, if called from omp_init_lock_with_hint:
2805 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2806 if (!codeptr)
2807 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2808 if (ompt_enabled.ompt_callback_mutex_released) {
2809 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002810 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002811 }
2812#endif
2813
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002815#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002816 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002817#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002818 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002819#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002820 else if ((__kmp_user_lock_kind == lk_futex) &&
2821 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2822 lck = (kmp_user_lock_p)user_lock;
2823 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002825 else {
2826 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2827 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828
2829#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002830 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002831#endif /* USE_ITT_BUILD */
2832
Jonathan Peyton30419822017-05-12 18:01:32 +00002833 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002834
Joachim Protze82e94a52017-11-01 10:08:30 +00002835#if OMPT_SUPPORT && OMPT_OPTIONAL
2836 // This is the case, if called from omp_init_lock_with_hint:
2837 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2838 if (!codeptr)
2839 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2840 if (ompt_enabled.ompt_callback_mutex_released) {
2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002842 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002844#endif
2845
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002846#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847}
2848
2849/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002850void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002851#if KMP_USE_DYNAMIC_LOCK
2852
Jonathan Peyton30419822017-05-12 18:01:32 +00002853#if USE_ITT_BUILD
2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2855#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002856 int release_status =
2857 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002858 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002859
2860#if OMPT_SUPPORT && OMPT_OPTIONAL
2861 // This is the case, if called from omp_init_lock_with_hint:
2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2863 if (!codeptr)
2864 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2865 if (ompt_enabled.enabled) {
2866 if (release_status == KMP_LOCK_RELEASED) {
2867 if (ompt_enabled.ompt_callback_mutex_released) {
2868 // release_lock_last
2869 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002870 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002871 }
2872 } else if (ompt_enabled.ompt_callback_nest_lock) {
2873 // release_lock_prev
2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002875 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002876 }
2877 }
2878#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002879
2880#else // KMP_USE_DYNAMIC_LOCK
2881
2882 kmp_user_lock_p lck;
2883
2884 /* Can't use serial interval since not block structured */
2885
2886 if ((__kmp_user_lock_kind == lk_tas) &&
2887 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2888 OMP_NEST_LOCK_T_SIZE)) {
2889#if KMP_OS_LINUX && \
2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2891 // "fast" path implemented to fix customer performance issue
2892 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2893#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002895#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002896
2897#if OMPT_SUPPORT && OMPT_OPTIONAL
2898 int release_status = KMP_LOCK_STILL_HELD;
2899#endif
2900
Jonathan Peyton30419822017-05-12 18:01:32 +00002901 if (--(tl->lk.depth_locked) == 0) {
2902 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002903#if OMPT_SUPPORT && OMPT_OPTIONAL
2904 release_status = KMP_LOCK_RELEASED;
2905#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002906 }
2907 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002908
2909#if OMPT_SUPPORT && OMPT_OPTIONAL
2910 // This is the case, if called from omp_init_lock_with_hint:
2911 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2912 if (!codeptr)
2913 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2914 if (ompt_enabled.enabled) {
2915 if (release_status == KMP_LOCK_RELEASED) {
2916 if (ompt_enabled.ompt_callback_mutex_released) {
2917 // release_lock_last
2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002919 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002920 }
2921 } else if (ompt_enabled.ompt_callback_nest_lock) {
2922 // release_lock_previous
2923 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002924 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002925 }
2926 }
2927#endif
2928
Jonathan Peyton30419822017-05-12 18:01:32 +00002929 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002931 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002932#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002933 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002934#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002935 else if ((__kmp_user_lock_kind == lk_futex) &&
2936 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2937 OMP_NEST_LOCK_T_SIZE)) {
2938 lck = (kmp_user_lock_p)user_lock;
2939 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 else {
2942 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2943 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944
2945#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002946 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947#endif /* USE_ITT_BUILD */
2948
Jonathan Peyton30419822017-05-12 18:01:32 +00002949 int release_status;
2950 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002951#if OMPT_SUPPORT && OMPT_OPTIONAL
2952 // This is the case, if called from omp_init_lock_with_hint:
2953 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2954 if (!codeptr)
2955 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2956 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002958 if (ompt_enabled.ompt_callback_mutex_released) {
2959 // release_lock_last
2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002961 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002962 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002963 } else if (ompt_enabled.ompt_callback_nest_lock) {
2964 // release_lock_previous
2965 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002966 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002967 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002968 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002969#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002970
2971#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972}
2973
2974/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002975int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2976 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002977
2978#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002979 int rc;
2980 int tag = KMP_EXTRACT_D_TAG(user_lock);
2981#if USE_ITT_BUILD
2982 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2983#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002984#if OMPT_SUPPORT && OMPT_OPTIONAL
2985 // This is the case, if called from omp_init_lock_with_hint:
2986 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2987 if (!codeptr)
2988 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2989 if (ompt_enabled.ompt_callback_mutex_acquire) {
2990 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2991 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002992 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002993 codeptr);
2994 }
2995#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002996#if KMP_USE_INLINED_TAS
2997 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2998 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2999 } else
3000#elif KMP_USE_INLINED_FUTEX
3001 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3002 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3003 } else
3004#endif
3005 {
3006 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3007 }
3008 if (rc) {
3009#if USE_ITT_BUILD
3010 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3011#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003012#if OMPT_SUPPORT && OMPT_OPTIONAL
3013 if (ompt_enabled.ompt_callback_mutex_acquired) {
3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003015 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003016 }
3017#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003018 return FTN_TRUE;
3019 } else {
3020#if USE_ITT_BUILD
3021 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3022#endif
3023 return FTN_FALSE;
3024 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003025
3026#else // KMP_USE_DYNAMIC_LOCK
3027
Jonathan Peyton30419822017-05-12 18:01:32 +00003028 kmp_user_lock_p lck;
3029 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003030
Jonathan Peyton30419822017-05-12 18:01:32 +00003031 if ((__kmp_user_lock_kind == lk_tas) &&
3032 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033 lck = (kmp_user_lock_p)user_lock;
3034 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003035#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003036 else if ((__kmp_user_lock_kind == lk_futex) &&
3037 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3038 lck = (kmp_user_lock_p)user_lock;
3039 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003040#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 else {
3042 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3043 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003044
3045#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003046 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003048#if OMPT_SUPPORT && OMPT_OPTIONAL
3049 // This is the case, if called from omp_init_lock_with_hint:
3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051 if (!codeptr)
3052 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3053 if (ompt_enabled.ompt_callback_mutex_acquire) {
3054 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3055 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003056 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003057 }
3058#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003059
Jonathan Peyton30419822017-05-12 18:01:32 +00003060 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003061#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003062 if (rc) {
3063 __kmp_itt_lock_acquired(lck);
3064 } else {
3065 __kmp_itt_lock_cancelled(lck);
3066 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003067#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003068#if OMPT_SUPPORT && OMPT_OPTIONAL
3069 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003071 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003072 }
3073#endif
3074
Jonathan Peyton30419822017-05-12 18:01:32 +00003075 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003076
Jonathan Peyton30419822017-05-12 18:01:32 +00003077/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003078
3079#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080}
3081
3082/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003083int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003084#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003085 int rc;
3086#if USE_ITT_BUILD
3087 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3088#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003089#if OMPT_SUPPORT && OMPT_OPTIONAL
3090 // This is the case, if called from omp_init_lock_with_hint:
3091 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3092 if (!codeptr)
3093 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3094 if (ompt_enabled.ompt_callback_mutex_acquire) {
3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3096 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003097 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003098 codeptr);
3099 }
3100#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3102#if USE_ITT_BUILD
3103 if (rc) {
3104 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3105 } else {
3106 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3107 }
3108#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003109#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 if (ompt_enabled.enabled && rc) {
3111 if (rc == 1) {
3112 if (ompt_enabled.ompt_callback_mutex_acquired) {
3113 // lock_first
3114 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003115 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003116 }
3117 } else {
3118 if (ompt_enabled.ompt_callback_nest_lock) {
3119 // lock_next
3120 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003121 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003122 }
3123 }
3124 }
3125#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003126 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003127
3128#else // KMP_USE_DYNAMIC_LOCK
3129
Jonathan Peyton30419822017-05-12 18:01:32 +00003130 kmp_user_lock_p lck;
3131 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003132
Jonathan Peyton30419822017-05-12 18:01:32 +00003133 if ((__kmp_user_lock_kind == lk_tas) &&
3134 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3135 OMP_NEST_LOCK_T_SIZE)) {
3136 lck = (kmp_user_lock_p)user_lock;
3137 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003138#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 else if ((__kmp_user_lock_kind == lk_futex) &&
3140 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3141 OMP_NEST_LOCK_T_SIZE)) {
3142 lck = (kmp_user_lock_p)user_lock;
3143 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003145 else {
3146 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3147 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148
3149#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003150 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003151#endif /* USE_ITT_BUILD */
3152
Joachim Protze82e94a52017-11-01 10:08:30 +00003153#if OMPT_SUPPORT && OMPT_OPTIONAL
3154 // This is the case, if called from omp_init_lock_with_hint:
3155 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3156 if (!codeptr)
3157 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3158 if (ompt_enabled.enabled) &&
3159 ompt_enabled.ompt_callback_mutex_acquire) {
3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3161 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003162 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003163 }
3164#endif
3165
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003168 if (rc) {
3169 __kmp_itt_lock_acquired(lck);
3170 } else {
3171 __kmp_itt_lock_cancelled(lck);
3172 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003174#if OMPT_SUPPORT && OMPT_OPTIONAL
3175 if (ompt_enabled.enabled && rc) {
3176 if (rc == 1) {
3177 if (ompt_enabled.ompt_callback_mutex_acquired) {
3178 // lock_first
3179 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003180 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003181 }
3182 } else {
3183 if (ompt_enabled.ompt_callback_nest_lock) {
3184 // lock_next
3185 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003186 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003187 }
3188 }
3189 }
3190#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003191 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003194
3195#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003196}
3197
Jonathan Peyton30419822017-05-12 18:01:32 +00003198// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003199
Jonathan Peyton30419822017-05-12 18:01:32 +00003200// keep the selected method in a thread local structure for cross-function
3201// usage: will be used in __kmpc_end_reduce* functions;
3202// another solution: to re-determine the method one more time in
3203// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003204// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003205#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3206 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003207
Jonathan Peyton30419822017-05-12 18:01:32 +00003208#define __KMP_GET_REDUCTION_METHOD(gtid) \
3209 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003210
Jonathan Peyton30419822017-05-12 18:01:32 +00003211// description of the packed_reduction_method variable: look at the macros in
3212// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003213
3214// used in a critical section reduce block
3215static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003216__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3217 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003218
Jonathan Peyton30419822017-05-12 18:01:32 +00003219 // this lock was visible to a customer and to the threading profile tool as a
3220 // serial overhead span (although it's used for an internal purpose only)
3221 // why was it visible in previous implementation?
3222 // should we keep it visible in new reduce block?
3223 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003225#if KMP_USE_DYNAMIC_LOCK
3226
Jonathan Peyton30419822017-05-12 18:01:32 +00003227 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3228 // Check if it is initialized.
3229 if (*lk == 0) {
3230 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3231 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3232 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003233 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003234 __kmp_init_indirect_csptr(crit, loc, global_tid,
3235 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003236 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003237 }
3238 // Branch for accessing the actual lock object and set operation. This
3239 // branching is inevitable since this lock initialization does not follow the
3240 // normal dispatch path (lock table is not used).
3241 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3242 lck = (kmp_user_lock_p)lk;
3243 KMP_DEBUG_ASSERT(lck != NULL);
3244 if (__kmp_env_consistency_check) {
3245 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3246 }
3247 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3248 } else {
3249 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3250 lck = ilk->lock;
3251 KMP_DEBUG_ASSERT(lck != NULL);
3252 if (__kmp_env_consistency_check) {
3253 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3254 }
3255 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3256 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003257
3258#else // KMP_USE_DYNAMIC_LOCK
3259
Jonathan Peyton30419822017-05-12 18:01:32 +00003260 // We know that the fast reduction code is only emitted by Intel compilers
3261 // with 32 byte critical sections. If there isn't enough space, then we
3262 // have to use a pointer.
3263 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3264 lck = (kmp_user_lock_p)crit;
3265 } else {
3266 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3267 }
3268 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003269
Jonathan Peyton30419822017-05-12 18:01:32 +00003270 if (__kmp_env_consistency_check)
3271 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003272
Jonathan Peyton30419822017-05-12 18:01:32 +00003273 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003274
3275#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276}
3277
3278// used in a critical section reduce block
3279static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003280__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3281 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003284
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003285#if KMP_USE_DYNAMIC_LOCK
3286
Jonathan Peyton30419822017-05-12 18:01:32 +00003287 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3288 lck = (kmp_user_lock_p)crit;
3289 if (__kmp_env_consistency_check)
3290 __kmp_pop_sync(global_tid, ct_critical, loc);
3291 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3292 } else {
3293 kmp_indirect_lock_t *ilk =
3294 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3295 if (__kmp_env_consistency_check)
3296 __kmp_pop_sync(global_tid, ct_critical, loc);
3297 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3298 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003299
3300#else // KMP_USE_DYNAMIC_LOCK
3301
Jonathan Peyton30419822017-05-12 18:01:32 +00003302 // We know that the fast reduction code is only emitted by Intel compilers
3303 // with 32 byte critical sections. If there isn't enough space, then we have
3304 // to use a pointer.
3305 if (__kmp_base_user_lock_size > 32) {
3306 lck = *((kmp_user_lock_p *)crit);
3307 KMP_ASSERT(lck != NULL);
3308 } else {
3309 lck = (kmp_user_lock_p)crit;
3310 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003311
Jonathan Peyton30419822017-05-12 18:01:32 +00003312 if (__kmp_env_consistency_check)
3313 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003314
Jonathan Peyton30419822017-05-12 18:01:32 +00003315 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003316
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003317#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318} // __kmp_end_critical_section_reduce_block
3319
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003320#if OMP_40_ENABLED
3321static __forceinline int
3322__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3323 int *task_state) {
3324 kmp_team_t *team;
3325
3326 // Check if we are inside the teams construct?
3327 if (th->th.th_teams_microtask) {
3328 *team_p = team = th->th.th_team;
3329 if (team->t.t_level == th->th.th_teams_level) {
3330 // This is reduction at teams construct.
3331 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3332 // Let's swap teams temporarily for the reduction.
3333 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3334 th->th.th_team = team->t.t_parent;
3335 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3336 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3337 *task_state = th->th.th_task_state;
3338 th->th.th_task_state = 0;
3339
3340 return 1;
3341 }
3342 }
3343 return 0;
3344}
3345
3346static __forceinline void
3347__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3348 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3349 th->th.th_info.ds.ds_tid = 0;
3350 th->th.th_team = team;
3351 th->th.th_team_nproc = team->t.t_nproc;
3352 th->th.th_task_team = team->t.t_task_team[task_state];
3353 th->th.th_task_state = task_state;
3354}
3355#endif
3356
Jim Cownie5e8470a2013-09-27 10:38:44 +00003357/* 2.a.i. Reduce Block without a terminating barrier */
3358/*!
3359@ingroup SYNCHRONIZATION
3360@param loc source location information
3361@param global_tid global thread number
3362@param num_vars number of items (variables) to be reduced
3363@param reduce_size size of data in bytes to be reduced
3364@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003365@param reduce_func callback function providing reduction operation on two
3366operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003367@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003368@result 1 for the master thread, 0 for all other team threads, 2 for all team
3369threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003370
3371The nowait version is used for a reduce clause with the nowait argument.
3372*/
3373kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003374__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3375 size_t reduce_size, void *reduce_data,
3376 void (*reduce_func)(void *lhs_data, void *rhs_data),
3377 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003378
Jonathan Peyton30419822017-05-12 18:01:32 +00003379 KMP_COUNT_BLOCK(REDUCE_nowait);
3380 int retval = 0;
3381 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003382#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003384 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003385 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003386#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003387 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003388
Jonathan Peyton30419822017-05-12 18:01:32 +00003389 // why do we need this initialization here at all?
3390 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003391
Jonathan Peyton30419822017-05-12 18:01:32 +00003392 // do not call __kmp_serial_initialize(), it will be called by
3393 // __kmp_parallel_initialize() if needed
3394 // possible detection of false-positive race by the threadchecker ???
3395 if (!TCR_4(__kmp_init_parallel))
3396 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003397
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003398#if OMP_50_ENABLED
3399 __kmp_resume_if_soft_paused();
3400#endif
3401
Jonathan Peyton30419822017-05-12 18:01:32 +00003402// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003403#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003404 if (__kmp_env_consistency_check)
3405 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003406#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003407 if (__kmp_env_consistency_check)
3408 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003409#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003410
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003411#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003412 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003413 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003414#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003415
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3417 // the value should be kept in a variable
3418 // the variable should be either a construct-specific or thread-specific
3419 // property, not a team specific property
3420 // (a thread can reach the next reduce block on the next construct, reduce
3421 // method may differ on the next construct)
3422 // an ident_t "loc" parameter could be used as a construct-specific property
3423 // (what if loc == 0?)
3424 // (if both construct-specific and team-specific variables were shared,
3425 // then unness extra syncs should be needed)
3426 // a thread-specific variable is better regarding two issues above (next
3427 // construct and extra syncs)
3428 // a thread-specific "th_local.reduction_method" variable is used currently
3429 // each thread executes 'determine' and 'set' lines (no need to execute by one
3430 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431
Jonathan Peyton30419822017-05-12 18:01:32 +00003432 packed_reduction_method = __kmp_determine_reduction_method(
3433 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3434 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003435
Jonathan Peyton30419822017-05-12 18:01:32 +00003436 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003437
Jonathan Peyton30419822017-05-12 18:01:32 +00003438 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3439 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440
Jonathan Peyton30419822017-05-12 18:01:32 +00003441 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 // usage: if team size == 1, no synchronization is required ( Intel
3444 // platforms only )
3445 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
Jonathan Peyton30419822017-05-12 18:01:32 +00003447 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003450
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3452 // won't be called by the code gen)
3453 // (it's not quite good, because the checking block has been closed by
3454 // this 'pop',
3455 // but atomic operation has not been executed yet, will be executed
3456 // slightly later, literally on next instruction)
3457 if (__kmp_env_consistency_check)
3458 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3461 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003462
Jonathan Peyton30419822017-05-12 18:01:32 +00003463// AT: performance issue: a real barrier here
3464// AT: (if master goes slow, other threads are blocked here waiting for the
3465// master to come and release them)
3466// AT: (it's not what a customer might expect specifying NOWAIT clause)
3467// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3468// be confusing to a customer)
3469// AT: another implementation of *barrier_gather*nowait() (or some other design)
3470// might go faster and be more in line with sense of NOWAIT
3471// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003472
Jonathan Peyton30419822017-05-12 18:01:32 +00003473// this barrier should be invisible to a customer and to the threading profile
3474// tool (it's neither a terminating barrier nor customer's code, it's
3475// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003476#if OMPT_SUPPORT
3477 // JP: can this barrier potentially leed to task scheduling?
3478 // JP: as long as there is a barrier in the implementation, OMPT should and
3479 // will provide the barrier events
3480 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003481 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003482 if (ompt_enabled.enabled) {
3483 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003484 if (ompt_frame->enter_frame.ptr == NULL)
3485 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003486 OMPT_STORE_RETURN_ADDRESS(global_tid);
3487 }
3488#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003489#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003490 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003491#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003492 retval =
3493 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3494 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3495 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003496#if OMPT_SUPPORT && OMPT_OPTIONAL
3497 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003498 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003499 }
3500#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501
Jonathan Peyton30419822017-05-12 18:01:32 +00003502 // all other workers except master should do this pop here
3503 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3504 if (__kmp_env_consistency_check) {
3505 if (retval == 0) {
3506 __kmp_pop_sync(global_tid, ct_reduce, loc);
3507 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003508 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003509
3510 } else {
3511
3512 // should never reach this block
3513 KMP_ASSERT(0); // "unexpected method"
3514 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003515#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003517 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003519#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003520 KA_TRACE(
3521 10,
3522 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3523 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003524
Jonathan Peyton30419822017-05-12 18:01:32 +00003525 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003526}
3527
3528/*!
3529@ingroup SYNCHRONIZATION
3530@param loc source location information
3531@param global_tid global thread id.
3532@param lck pointer to the unique lock data structure
3533
3534Finish the execution of a reduce nowait.
3535*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003536void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3537 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538
Jonathan Peyton30419822017-05-12 18:01:32 +00003539 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540
Jonathan Peyton30419822017-05-12 18:01:32 +00003541 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003542
Jonathan Peyton30419822017-05-12 18:01:32 +00003543 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544
Jonathan Peyton30419822017-05-12 18:01:32 +00003545 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546
Jonathan Peyton30419822017-05-12 18:01:32 +00003547 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003548
Jonathan Peyton30419822017-05-12 18:01:32 +00003549 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
Jonathan Peyton30419822017-05-12 18:01:32 +00003551 // usage: if team size == 1, no synchronization is required ( on Intel
3552 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003553
Jonathan Peyton30419822017-05-12 18:01:32 +00003554 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003555
Jonathan Peyton30419822017-05-12 18:01:32 +00003556 // neither master nor other workers should get here
3557 // (code gen does not generate this call in case 2: atomic reduce block)
3558 // actually it's better to remove this elseif at all;
3559 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560
Jonathan Peyton30419822017-05-12 18:01:32 +00003561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003563
Jonathan Peyton30419822017-05-12 18:01:32 +00003564 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003565
Jonathan Peyton30419822017-05-12 18:01:32 +00003566 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003567
Jonathan Peyton30419822017-05-12 18:01:32 +00003568 // should never reach this block
3569 KMP_ASSERT(0); // "unexpected method"
3570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003571
Jonathan Peyton30419822017-05-12 18:01:32 +00003572 if (__kmp_env_consistency_check)
3573 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3576 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003577
Jonathan Peyton30419822017-05-12 18:01:32 +00003578 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579}
3580
3581/* 2.a.ii. Reduce Block with a terminating barrier */
3582
3583/*!
3584@ingroup SYNCHRONIZATION
3585@param loc source location information
3586@param global_tid global thread number
3587@param num_vars number of items (variables) to be reduced
3588@param reduce_size size of data in bytes to be reduced
3589@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003590@param reduce_func callback function providing reduction operation on two
3591operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003592@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003593@result 1 for the master thread, 0 for all other team threads, 2 for all team
3594threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595
3596A blocking reduce that includes an implicit barrier.
3597*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003598kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3599 size_t reduce_size, void *reduce_data,
3600 void (*reduce_func)(void *lhs_data, void *rhs_data),
3601 kmp_critical_name *lck) {
3602 KMP_COUNT_BLOCK(REDUCE_wait);
3603 int retval = 0;
3604 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003605#if OMP_40_ENABLED
3606 kmp_info_t *th;
3607 kmp_team_t *team;
3608 int teams_swapped = 0, task_state;
3609#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003610
Jonathan Peyton30419822017-05-12 18:01:32 +00003611 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003612
Jonathan Peyton30419822017-05-12 18:01:32 +00003613 // why do we need this initialization here at all?
3614 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003615
Jonathan Peyton30419822017-05-12 18:01:32 +00003616 // do not call __kmp_serial_initialize(), it will be called by
3617 // __kmp_parallel_initialize() if needed
3618 // possible detection of false-positive race by the threadchecker ???
3619 if (!TCR_4(__kmp_init_parallel))
3620 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003621
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003622#if OMP_50_ENABLED
3623 __kmp_resume_if_soft_paused();
3624#endif
3625
Jonathan Peyton30419822017-05-12 18:01:32 +00003626// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003627#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003628 if (__kmp_env_consistency_check)
3629 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003630#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003631 if (__kmp_env_consistency_check)
3632 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003633#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003634
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003635#if OMP_40_ENABLED
3636 th = __kmp_thread_from_gtid(global_tid);
3637 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3638#endif // OMP_40_ENABLED
3639
Jonathan Peyton30419822017-05-12 18:01:32 +00003640 packed_reduction_method = __kmp_determine_reduction_method(
3641 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3642 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
Jonathan Peyton30419822017-05-12 18:01:32 +00003646 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3647 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003648
Jonathan Peyton30419822017-05-12 18:01:32 +00003649 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003650
Jonathan Peyton30419822017-05-12 18:01:32 +00003651 // usage: if team size == 1, no synchronization is required ( Intel
3652 // platforms only )
3653 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003654
Jonathan Peyton30419822017-05-12 18:01:32 +00003655 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003656
Jonathan Peyton30419822017-05-12 18:01:32 +00003657 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003658
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003661
Jonathan Peyton30419822017-05-12 18:01:32 +00003662// case tree_reduce_block:
3663// this barrier should be visible to a customer and to the threading profile
3664// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003665#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003666 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003667 if (ompt_enabled.enabled) {
3668 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003669 if (ompt_frame->enter_frame.ptr == NULL)
3670 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003671 OMPT_STORE_RETURN_ADDRESS(global_tid);
3672 }
3673#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003674#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003675 __kmp_threads[global_tid]->th.th_ident =
3676 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003677#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003678 retval =
3679 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3680 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3681 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003682#if OMPT_SUPPORT && OMPT_OPTIONAL
3683 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003684 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003685 }
3686#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003687
Jonathan Peyton30419822017-05-12 18:01:32 +00003688 // all other workers except master should do this pop here
3689 // ( none of other workers except master will enter __kmpc_end_reduce() )
3690 if (__kmp_env_consistency_check) {
3691 if (retval == 0) { // 0: all other workers; 1: master
3692 __kmp_pop_sync(global_tid, ct_reduce, loc);
3693 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003694 }
3695
Jonathan Peyton30419822017-05-12 18:01:32 +00003696 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697
Jonathan Peyton30419822017-05-12 18:01:32 +00003698 // should never reach this block
3699 KMP_ASSERT(0); // "unexpected method"
3700 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003701#if OMP_40_ENABLED
3702 if (teams_swapped) {
3703 __kmp_restore_swapped_teams(th, team, task_state);
3704 }
3705#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003706
3707 KA_TRACE(10,
3708 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3709 global_tid, packed_reduction_method, retval));
3710
3711 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003712}
3713
3714/*!
3715@ingroup SYNCHRONIZATION
3716@param loc source location information
3717@param global_tid global thread id.
3718@param lck pointer to the unique lock data structure
3719
3720Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003721The <tt>lck</tt> pointer must be the same as that used in the corresponding
3722start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003723*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003724void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3725 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003726
Jonathan Peyton30419822017-05-12 18:01:32 +00003727 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003728#if OMP_40_ENABLED
3729 kmp_info_t *th;
3730 kmp_team_t *team;
3731 int teams_swapped = 0, task_state;
3732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003736#if OMP_40_ENABLED
3737 th = __kmp_thread_from_gtid(global_tid);
3738 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3739#endif // OMP_40_ENABLED
3740
Jonathan Peyton30419822017-05-12 18:01:32 +00003741 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003742
Jonathan Peyton30419822017-05-12 18:01:32 +00003743 // this barrier should be visible to a customer and to the threading profile
3744 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003745
Jonathan Peyton30419822017-05-12 18:01:32 +00003746 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003747
Jonathan Peyton30419822017-05-12 18:01:32 +00003748 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003749
Jonathan Peyton30419822017-05-12 18:01:32 +00003750// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003751#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003752 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003753 if (ompt_enabled.enabled) {
3754 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003755 if (ompt_frame->enter_frame.ptr == NULL)
3756 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003757 OMPT_STORE_RETURN_ADDRESS(global_tid);
3758 }
3759#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003760#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003761 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003762#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003763 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003764#if OMPT_SUPPORT && OMPT_OPTIONAL
3765 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003766 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003767 }
3768#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003771
Jonathan Peyton30419822017-05-12 18:01:32 +00003772// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003773
Jonathan Peyton30419822017-05-12 18:01:32 +00003774// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003775#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003776 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003777 if (ompt_enabled.enabled) {
3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003779 if (ompt_frame->enter_frame.ptr == NULL)
3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003781 OMPT_STORE_RETURN_ADDRESS(global_tid);
3782 }
3783#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003784#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003785 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003786#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003788#if OMPT_SUPPORT && OMPT_OPTIONAL
3789 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003790 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003791 }
3792#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003793
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003795
Joachim Protze82e94a52017-11-01 10:08:30 +00003796#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003797 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003798 if (ompt_enabled.enabled) {
3799 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003800 if (ompt_frame->enter_frame.ptr == NULL)
3801 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003802 OMPT_STORE_RETURN_ADDRESS(global_tid);
3803 }
3804#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003805// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003806#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003807 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003808#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003809 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003810#if OMPT_SUPPORT && OMPT_OPTIONAL
3811 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003812 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003813 }
3814#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003815
Jonathan Peyton30419822017-05-12 18:01:32 +00003816 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3817 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003818
Jonathan Peyton30419822017-05-12 18:01:32 +00003819 // only master executes here (master releases all other workers)
3820 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3821 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003822
Jonathan Peyton30419822017-05-12 18:01:32 +00003823 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003824
Jonathan Peyton30419822017-05-12 18:01:32 +00003825 // should never reach this block
3826 KMP_ASSERT(0); // "unexpected method"
3827 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003828#if OMP_40_ENABLED
3829 if (teams_swapped) {
3830 __kmp_restore_swapped_teams(th, team, task_state);
3831 }
3832#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003833
Jonathan Peyton30419822017-05-12 18:01:32 +00003834 if (__kmp_env_consistency_check)
3835 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003836
Jonathan Peyton30419822017-05-12 18:01:32 +00003837 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3838 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003839
Jonathan Peyton30419822017-05-12 18:01:32 +00003840 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003841}
3842
3843#undef __KMP_GET_REDUCTION_METHOD
3844#undef __KMP_SET_REDUCTION_METHOD
3845
Jonathan Peyton30419822017-05-12 18:01:32 +00003846/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003847
Jonathan Peyton30419822017-05-12 18:01:32 +00003848kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003849
Jonathan Peyton30419822017-05-12 18:01:32 +00003850 kmp_int32 gtid;
3851 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003852
Jonathan Peyton30419822017-05-12 18:01:32 +00003853 gtid = __kmp_get_gtid();
3854 if (gtid < 0) {
3855 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003856 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003857 thread = __kmp_thread_from_gtid(gtid);
3858 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859
3860} // __kmpc_get_taskid
3861
Jonathan Peyton30419822017-05-12 18:01:32 +00003862kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003863
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 kmp_int32 gtid;
3865 kmp_info_t *thread;
3866 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003867
Jonathan Peyton30419822017-05-12 18:01:32 +00003868 gtid = __kmp_get_gtid();
3869 if (gtid < 0) {
3870 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003871 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003872 thread = __kmp_thread_from_gtid(gtid);
3873 parent_task = thread->th.th_current_task->td_parent;
3874 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003875
3876} // __kmpc_get_parent_taskid
3877
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003878#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003879/*!
3880@ingroup WORK_SHARING
3881@param loc source location information.
3882@param gtid global thread number.
3883@param num_dims number of associated doacross loops.
3884@param dims info on loops bounds.
3885
3886Initialize doacross loop information.
3887Expect compiler send us inclusive bounds,
3888e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3889*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003890void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003891 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003892 int j, idx;
3893 kmp_int64 last, trace_count;
3894 kmp_info_t *th = __kmp_threads[gtid];
3895 kmp_team_t *team = th->th.th_team;
3896 kmp_uint32 *flags;
3897 kmp_disp_t *pr_buf = th->th.th_dispatch;
3898 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003899
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 KA_TRACE(
3901 20,
3902 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3903 gtid, num_dims, !team->t.t_serialized));
3904 KMP_DEBUG_ASSERT(dims != NULL);
3905 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003906
Jonathan Peyton30419822017-05-12 18:01:32 +00003907 if (team->t.t_serialized) {
3908 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3909 return; // no dependencies if team is serialized
3910 }
3911 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3912 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3913 // the next loop
3914 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003915
Jonathan Peyton30419822017-05-12 18:01:32 +00003916 // Save bounds info into allocated private buffer
3917 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3918 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3919 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3920 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3921 pr_buf->th_doacross_info[0] =
3922 (kmp_int64)num_dims; // first element is number of dimensions
3923 // Save also address of num_done in order to access it later without knowing
3924 // the buffer index
3925 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3926 pr_buf->th_doacross_info[2] = dims[0].lo;
3927 pr_buf->th_doacross_info[3] = dims[0].up;
3928 pr_buf->th_doacross_info[4] = dims[0].st;
3929 last = 5;
3930 for (j = 1; j < num_dims; ++j) {
3931 kmp_int64
3932 range_length; // To keep ranges of all dimensions but the first dims[0]
3933 if (dims[j].st == 1) { // most common case
3934 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3935 range_length = dims[j].up - dims[j].lo + 1;
3936 } else {
3937 if (dims[j].st > 0) {
3938 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3939 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3940 } else { // negative increment
3941 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3942 range_length =
3943 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3944 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003945 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003946 pr_buf->th_doacross_info[last++] = range_length;
3947 pr_buf->th_doacross_info[last++] = dims[j].lo;
3948 pr_buf->th_doacross_info[last++] = dims[j].up;
3949 pr_buf->th_doacross_info[last++] = dims[j].st;
3950 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003951
Jonathan Peyton30419822017-05-12 18:01:32 +00003952 // Compute total trip count.
3953 // Start with range of dims[0] which we don't need to keep in the buffer.
3954 if (dims[0].st == 1) { // most common case
3955 trace_count = dims[0].up - dims[0].lo + 1;
3956 } else if (dims[0].st > 0) {
3957 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3958 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3959 } else { // negative increment
3960 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3961 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3962 }
3963 for (j = 1; j < num_dims; ++j) {
3964 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3965 }
3966 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003967
Jonathan Peyton30419822017-05-12 18:01:32 +00003968 // Check if shared buffer is not occupied by other loop (idx -
3969 // __kmp_dispatch_num_buffers)
3970 if (idx != sh_buf->doacross_buf_idx) {
3971 // Shared buffer is occupied, wait for it to be free
Jonathan Peytone47d32f2019-02-28 19:11:29 +00003972 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3973 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003974 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003975#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003976 // Check if we are the first thread. After the CAS the first thread gets 0,
3977 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003978 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3979 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3980 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3981#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003983 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3984#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003985 if (flags == NULL) {
3986 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003987 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003988 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3989 KMP_MB();
3990 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003991 } else if (flags == (kmp_uint32 *)1) {
3992#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003993 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003994 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3995#else
3996 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3997#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003998 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003999 KMP_MB();
4000 } else {
4001 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004002 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004003 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00004004 pr_buf->th_doacross_flags =
4005 sh_buf->doacross_flags; // save private copy in order to not
4006 // touch shared buffer on each iteration
4007 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004008}
4009
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004010void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004011 kmp_int32 shft, num_dims, i;
4012 kmp_uint32 flag;
4013 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4014 kmp_info_t *th = __kmp_threads[gtid];
4015 kmp_team_t *team = th->th.th_team;
4016 kmp_disp_t *pr_buf;
4017 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004018
Jonathan Peyton30419822017-05-12 18:01:32 +00004019 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4020 if (team->t.t_serialized) {
4021 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4022 return; // no dependencies if team is serialized
4023 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004024
Jonathan Peyton30419822017-05-12 18:01:32 +00004025 // calculate sequential iteration number and check out-of-bounds condition
4026 pr_buf = th->th.th_dispatch;
4027 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4028 num_dims = pr_buf->th_doacross_info[0];
4029 lo = pr_buf->th_doacross_info[2];
4030 up = pr_buf->th_doacross_info[3];
4031 st = pr_buf->th_doacross_info[4];
4032 if (st == 1) { // most common case
4033 if (vec[0] < lo || vec[0] > up) {
4034 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4035 "bounds [%lld,%lld]\n",
4036 gtid, vec[0], lo, up));
4037 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004038 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004039 iter_number = vec[0] - lo;
4040 } else if (st > 0) {
4041 if (vec[0] < lo || vec[0] > up) {
4042 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4043 "bounds [%lld,%lld]\n",
4044 gtid, vec[0], lo, up));
4045 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004046 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4048 } else { // negative increment
4049 if (vec[0] > lo || vec[0] < up) {
4050 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4051 "bounds [%lld,%lld]\n",
4052 gtid, vec[0], lo, up));
4053 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004054 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004055 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4056 }
4057 for (i = 1; i < num_dims; ++i) {
4058 kmp_int64 iter, ln;
4059 kmp_int32 j = i * 4;
4060 ln = pr_buf->th_doacross_info[j + 1];
4061 lo = pr_buf->th_doacross_info[j + 2];
4062 up = pr_buf->th_doacross_info[j + 3];
4063 st = pr_buf->th_doacross_info[j + 4];
4064 if (st == 1) {
4065 if (vec[i] < lo || vec[i] > up) {
4066 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4067 "bounds [%lld,%lld]\n",
4068 gtid, vec[i], lo, up));
4069 return;
4070 }
4071 iter = vec[i] - lo;
4072 } else if (st > 0) {
4073 if (vec[i] < lo || vec[i] > up) {
4074 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4075 "bounds [%lld,%lld]\n",
4076 gtid, vec[i], lo, up));
4077 return;
4078 }
4079 iter = (kmp_uint64)(vec[i] - lo) / st;
4080 } else { // st < 0
4081 if (vec[i] > lo || vec[i] < up) {
4082 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4083 "bounds [%lld,%lld]\n",
4084 gtid, vec[i], lo, up));
4085 return;
4086 }
4087 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4088 }
4089 iter_number = iter + ln * iter_number;
4090 }
4091 shft = iter_number % 32; // use 32-bit granularity
4092 iter_number >>= 5; // divided by 32
4093 flag = 1 << shft;
4094 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4095 KMP_YIELD(TRUE);
4096 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004097 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004098 KA_TRACE(20,
4099 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4100 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004101}
4102
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004103void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004104 kmp_int32 shft, num_dims, i;
4105 kmp_uint32 flag;
4106 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4107 kmp_info_t *th = __kmp_threads[gtid];
4108 kmp_team_t *team = th->th.th_team;
4109 kmp_disp_t *pr_buf;
4110 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004111
Jonathan Peyton30419822017-05-12 18:01:32 +00004112 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4113 if (team->t.t_serialized) {
4114 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4115 return; // no dependencies if team is serialized
4116 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004117
Jonathan Peyton30419822017-05-12 18:01:32 +00004118 // calculate sequential iteration number (same as in "wait" but no
4119 // out-of-bounds checks)
4120 pr_buf = th->th.th_dispatch;
4121 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4122 num_dims = pr_buf->th_doacross_info[0];
4123 lo = pr_buf->th_doacross_info[2];
4124 st = pr_buf->th_doacross_info[4];
4125 if (st == 1) { // most common case
4126 iter_number = vec[0] - lo;
4127 } else if (st > 0) {
4128 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4129 } else { // negative increment
4130 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4131 }
4132 for (i = 1; i < num_dims; ++i) {
4133 kmp_int64 iter, ln;
4134 kmp_int32 j = i * 4;
4135 ln = pr_buf->th_doacross_info[j + 1];
4136 lo = pr_buf->th_doacross_info[j + 2];
4137 st = pr_buf->th_doacross_info[j + 4];
4138 if (st == 1) {
4139 iter = vec[i] - lo;
4140 } else if (st > 0) {
4141 iter = (kmp_uint64)(vec[i] - lo) / st;
4142 } else { // st < 0
4143 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004144 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004145 iter_number = iter + ln * iter_number;
4146 }
4147 shft = iter_number % 32; // use 32-bit granularity
4148 iter_number >>= 5; // divided by 32
4149 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004150 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004151 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004152 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004153 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4154 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004155}
4156
Jonathan Peyton30419822017-05-12 18:01:32 +00004157void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004158 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004159 kmp_info_t *th = __kmp_threads[gtid];
4160 kmp_team_t *team = th->th.th_team;
4161 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004162
Jonathan Peyton30419822017-05-12 18:01:32 +00004163 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4164 if (team->t.t_serialized) {
4165 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4166 return; // nothing to do
4167 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004168 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004169 if (num_done == th->th.th_team_nproc) {
4170 // we are the last thread, need to free shared resources
4171 int idx = pr_buf->th_doacross_buf_idx - 1;
4172 dispatch_shared_info_t *sh_buf =
4173 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4174 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4175 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004176 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004177 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004178 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004179 sh_buf->doacross_flags = NULL;
4180 sh_buf->doacross_num_done = 0;
4181 sh_buf->doacross_buf_idx +=
4182 __kmp_dispatch_num_buffers; // free buffer for future re-use
4183 }
4184 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004185 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004186 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4187 pr_buf->th_doacross_info = NULL;
4188 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004189}
4190#endif
4191
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004192#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004193int __kmpc_get_target_offload(void) {
4194 if (!__kmp_init_serial) {
4195 __kmp_serial_initialize();
4196 }
4197 return __kmp_target_offload;
4198}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004199
4200int __kmpc_pause_resource(kmp_pause_status_t level) {
4201 if (!__kmp_init_serial) {
4202 return 1; // Can't pause if runtime is not initialized
4203 }
4204 return __kmp_pause_resource(level);
4205}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004206#endif // OMP_50_ENABLED
4207
Jim Cownie5e8470a2013-09-27 10:38:44 +00004208// end of file //