blob: eff84e11eb35d7f6bce4c1227b85dbb1da089e7a [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000073#if KMP_OS_WINDOWS && OMPT_SUPPORT
74 // Normal exit process on Windows does not allow worker threads of the final
75 // parallel region to finish reporting their events, so shutting down the
76 // library here fixes the issue at least for the cases where __kmpc_end() is
77 // placed properly.
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
80#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
83/*!
84@ingroup THREAD_STATES
85@param loc Source location information.
86@return The global thread index of the active thread.
87
88This function can be called in any context.
89
90If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000091single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
92that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000093active parallel construct. (Or zero if there is no active parallel
94construct, since the master thread is necessarily thread zero).
95
96If multiple non-OpenMP threads all enter an OpenMP construct then this
97will be a unique thread identifier among all the threads created by
98the OpenMP runtime (but the value cannote be defined in terms of
99OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000101kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
102 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107}
108
109/*!
110@ingroup THREAD_STATES
111@param loc Source location information.
112@return The number of threads under control of the OpenMP<sup>*</sup> runtime
113
114This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000115It returns the total number of threads under the control of the OpenMP runtime.
116That is not a number that can be determined by any OpenMP standard calls, since
117the library may be called from more than one non-OpenMP thread, and this
118reflects the total over all such calls. Similarly the runtime maintains
119underlying threads even when they are not active (since the cost of creating
120and destroying OS threads is high), this call counts all such threads even if
121they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000123kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
124 KC_TRACE(10,
125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128}
129
130/*!
131@ingroup THREAD_STATES
132@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000133@return The thread number of the calling thread in the innermost active parallel
134construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000136kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000146kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148
Jonathan Peyton30419822017-05-12 18:01:32 +0000149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150}
151
152/*!
153 * @ingroup DEPRECATED
154 * @param loc location description
155 *
156 * This function need not be called. It always returns TRUE.
157 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000158kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159#ifndef KMP_DEBUG
160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
163#else
164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 const char *semi2;
166 const char *semi3;
167 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168
Jonathan Peyton30419822017-05-12 18:01:32 +0000169 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000171 }
172 semi2 = loc->psource;
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 semi2 = strchr(semi2, ';');
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2 + 1, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187 name--;
188 }
189 if ((*name == '/') || (*name == ';')) {
190 name++;
191 }
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
194 }
195 }
196 semi3 = strchr(semi2 + 1, ';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
201 }
202 }
203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
206 }
207 return __kmp_par_range < 0;
208 }
209 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210
211#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000217@return 1 if this thread is executing inside an active parallel region, zero if
218not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000220kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222}
223
224/*!
225@ingroup PARALLEL
226@param loc source location information
227@param global_tid global thread number
228@param num_threads number of threads requested for this parallel construct
229
230Set the number of threads to be used by the next fork spawned by this thread.
231This call is only required if the parallel construct has a `num_threads` clause.
232*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234 kmp_int32 num_threads) {
235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239}
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245}
246
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#if OMP_40_ENABLED
248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255}
256
257#endif /* OMP_40_ENABLED */
258
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259/*!
260@ingroup PARALLEL
261@param loc source location information
262@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000263@param microtask pointer to callback routine consisting of outlined parallel
264construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265@param ... pointers to shared variables that aren't global
266
267Do the actual fork and call the microtask in the relevant number of threads.
268*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000269void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000271
Jonathan Peyton61118492016-05-20 19:03:38 +0000272#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000273 // If we were in a serial region, then stop the serial timer, record
274 // the event, and start parallel region timer
275 stats_state_e previous_state = KMP_GET_THREAD_STATE();
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278 } else {
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000281 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 if (inParallel) {
283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284 } else {
285 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000286 }
287#endif
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 // maybe to save thr_state is enough here
290 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 va_list ap;
292 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000295 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000296 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300 if (lwt)
301 ompt_frame = &(lwt->ompt_task_info.frame);
302 else {
303 int tid = __kmp_tid_from_gtid(gtid);
304 ompt_frame = &(
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000308 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000309 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000310#endif
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312#if INCLUDE_SSC_MARKS
313 SSC_MARK_FORKING();
314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000319#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325#if INCLUDE_SSC_MARKS
326 SSC_MARK_JOINING();
327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000329#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000330 ,
331 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000337
338#if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341 } else {
342 KMP_POP_PARTITIONED_TIMER();
343 }
344#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
347#if OMP_40_ENABLED
348/*!
349@ingroup PARALLEL
350@param loc source location information
351@param global_tid global thread number
352@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000353@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354
355Set the number of teams to be used by the teams construct.
356This call is only required if the teams construct has a `num_teams` clause
357or a `thread_limit` clause (or both).
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360 kmp_int32 num_teams, kmp_int32 num_threads) {
361 KA_TRACE(20,
362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
Jonathan Peyton30419822017-05-12 18:01:32 +0000365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366}
367
368/*!
369@ingroup PARALLEL
370@param loc source location information
371@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000372@param microtask pointer to callback routine consisting of outlined teams
373construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374@param ... pointers to shared variables that aren't global
375
376Do the actual fork and call the microtask in the relevant number of threads.
377*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000378void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379 ...) {
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
382 va_list ap;
383 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000385#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000386 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000387 stats_state_e previous_state = KMP_GET_THREAD_STATE();
388 if (previous_state == stats_state_e::SERIAL_REGION) {
389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
390 } else {
391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
392 }
393#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000394
Jonathan Peyton30419822017-05-12 18:01:32 +0000395 // remember teams entry point and nesting level
396 this_thr->th.th_teams_microtask = microtask;
397 this_thr->th.th_teams_level =
398 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000399
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000401 kmp_team_t *parent_team = this_thr->th.th_team;
402 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000403 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000406 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000407 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000408#endif
409
Jonathan Peyton30419822017-05-12 18:01:32 +0000410 // check if __kmpc_push_num_teams called, set default number of teams
411 // otherwise
412 if (this_thr->th.th_teams_size.nteams == 0) {
413 __kmp_push_num_teams(loc, gtid, 0, 0);
414 }
415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 VOLATILE_CAST(microtask_t)
421 __kmp_teams_master, // "wrapped" task
422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000423#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000424 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000427#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000428 );
429 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000430#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000431 ,
432 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000433#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000434 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000435
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000436 // Pop current CG root off list
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439 this_thr->th.th_cg_roots = tmp->up;
440 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
441 " to node %p. cg_nthreads was %d\n",
442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
Andrey Churbanovb7e6c372019-06-26 18:11:26 +0000443 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
444 int i = tmp->cg_nthreads--;
445 if (i == 1) { // check is we are the last thread in CG (not always the case)
446 __kmp_free(tmp);
447 }
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000448 // Restore current task's thread_limit from CG root
449 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
450 this_thr->th.th_current_task->td_icvs.thread_limit =
451 this_thr->th.th_cg_roots->cg_thread_limit;
452
Jonathan Peyton30419822017-05-12 18:01:32 +0000453 this_thr->th.th_teams_microtask = NULL;
454 this_thr->th.th_teams_level = 0;
455 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
456 va_end(ap);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000457#if KMP_STATS_ENABLED
458 if (previous_state == stats_state_e::SERIAL_REGION) {
459 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
460 } else {
461 KMP_POP_PARTITIONED_TIMER();
462 }
463#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464}
465#endif /* OMP_40_ENABLED */
466
Jim Cownie5e8470a2013-09-27 10:38:44 +0000467// I don't think this function should ever have been exported.
468// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
469// openmp code ever called it, but it's been exported from the RTL for so
470// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000471int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000472
473/*!
474@ingroup PARALLEL
475@param loc source location information
476@param global_tid global thread number
477
478Enter a serialized parallel construct. This interface is used to handle a
479conditional parallel region, like this,
480@code
481#pragma omp parallel if (condition)
482@endcode
483when the condition is false.
484*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000485void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000486// The implementation is now in kmp_runtime.cpp so that it can share static
487// functions with kmp_fork_call since the tasks to be done are similar in
488// each case.
489#if OMPT_SUPPORT
490 OMPT_STORE_RETURN_ADDRESS(global_tid);
491#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493}
494
495/*!
496@ingroup PARALLEL
497@param loc source location information
498@param global_tid global thread number
499
500Leave a serialized parallel construct.
501*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000502void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
503 kmp_internal_control_t *top;
504 kmp_info_t *this_thr;
505 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000506
Jonathan Peyton30419822017-05-12 18:01:32 +0000507 KC_TRACE(10,
508 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509
Jonathan Peyton30419822017-05-12 18:01:32 +0000510 /* skip all this code for autopar serialized loops since it results in
511 unacceptable overhead */
512 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
513 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 // Not autopar code
516 if (!TCR_4(__kmp_init_parallel))
517 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000518
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000519#if OMP_50_ENABLED
520 __kmp_resume_if_soft_paused();
521#endif
522
Jonathan Peyton30419822017-05-12 18:01:32 +0000523 this_thr = __kmp_threads[global_tid];
524 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525
Jonathan Peyton30419822017-05-12 18:01:32 +0000526#if OMP_45_ENABLED
527 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000528
Jonathan Peyton30419822017-05-12 18:01:32 +0000529 // we need to wait for the proxy tasks before finishing the thread
530 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
531 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
532#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000533
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 KMP_MB();
535 KMP_DEBUG_ASSERT(serial_team);
536 KMP_ASSERT(serial_team->t.t_serialized);
537 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
538 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
539 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
540 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000541
Joachim Protze82e94a52017-11-01 10:08:30 +0000542#if OMPT_SUPPORT
543 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000544 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
545 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000546 if (ompt_enabled.ompt_callback_implicit_task) {
547 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
548 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000549 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000550 }
551
552 // reset clear the task id only after unlinking the task
553 ompt_data_t *parent_task_data;
554 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
555
556 if (ompt_enabled.ompt_callback_parallel_end) {
557 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
558 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000559 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000560 }
561 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000562 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000563 }
564#endif
565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 /* If necessary, pop the internal control stack values and replace the team
567 * values */
568 top = serial_team->t.t_control_stack_top;
569 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
570 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
571 serial_team->t.t_control_stack_top = top->next;
572 __kmp_free(top);
573 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575 // if( serial_team -> t.t_serialized > 1 )
576 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000577
Jonathan Peyton30419822017-05-12 18:01:32 +0000578 /* pop dispatch buffers stack */
579 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
580 {
581 dispatch_private_info_t *disp_buffer =
582 serial_team->t.t_dispatch->th_disp_buffer;
583 serial_team->t.t_dispatch->th_disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer->next;
585 __kmp_free(disp_buffer);
586 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000587#if OMP_50_ENABLED
588 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
589#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000590
Jonathan Peyton30419822017-05-12 18:01:32 +0000591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593
Jonathan Peyton30419822017-05-12 18:01:32 +0000594/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595
596#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
Jonathan Peyton30419822017-05-12 18:01:32 +0000604 this_thr->th.th_team = serial_team->t.t_parent;
605 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606
Jonathan Peyton30419822017-05-12 18:01:32 +0000607 /* restore values cached in the thread */
608 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
609 this_thr->th.th_team_master =
610 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
611 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 /* TODO the below shouldn't need to be adjusted for serialized teams */
614 this_thr->th.th_dispatch =
615 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616
Jonathan Peyton30419822017-05-12 18:01:32 +0000617 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618
Jonathan Peyton30419822017-05-12 18:01:32 +0000619 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
620 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000621
Jonathan Peyton30419822017-05-12 18:01:32 +0000622 if (__kmp_tasking_mode != tskm_immediate_exec) {
623 // Copy the task team from the new child / old parent team to the thread.
624 this_thr->th.th_task_team =
625 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
626 KA_TRACE(20,
627 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
628 "team %p\n",
629 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000630 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000631 } else {
632 if (__kmp_tasking_mode != tskm_immediate_exec) {
633 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
634 "depth of serial team %p to %d\n",
635 global_tid, serial_team, serial_team->t.t_serialized));
636 }
637 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000638
Jonathan Peyton30419822017-05-12 18:01:32 +0000639 if (__kmp_env_consistency_check)
640 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000641#if OMPT_SUPPORT
642 if (ompt_enabled.enabled)
643 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000644 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
645 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000646#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647}
648
649/*!
650@ingroup SYNCHRONIZATION
651@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000652
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000653Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654depending on the memory ordering convention obeyed by the compiler
655even that may not be necessary).
656*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000657void __kmpc_flush(ident_t *loc) {
658 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659
Jonathan Peyton30419822017-05-12 18:01:32 +0000660 /* need explicit __mf() here since use volatile instead in library */
661 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662
Jonathan Peyton30419822017-05-12 18:01:32 +0000663#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
664#if KMP_MIC
665// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
666// We shouldn't need it, though, since the ABI rules require that
667// * If the compiler generates NGO stores it also generates the fence
668// * If users hand-code NGO stores they should insert the fence
669// therefore no incomplete unordered stores should be visible.
670#else
671 // C74404
672 // This is to address non-temporal store instructions (sfence needed).
673 // The clflush instruction is addressed either (mfence needed).
674 // Probably the non-temporal load monvtdqa instruction should also be
675 // addressed.
676 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
677 if (!__kmp_cpuinfo.initialized) {
678 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000679 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000680 if (!__kmp_cpuinfo.sse2) {
681 // CPU cannot execute SSE2 instructions.
682 } else {
683#if KMP_COMPILER_ICC
684 _mm_mfence();
685#elif KMP_COMPILER_MSVC
686 MemoryBarrier();
687#else
688 __sync_synchronize();
689#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000690 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000691#endif // KMP_MIC
692#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
693// Nothing to see here move along
694#elif KMP_ARCH_PPC64
695// Nothing needed here (we have a real MB above).
696#if KMP_OS_CNK
697 // The flushing thread needs to yield here; this prevents a
698 // busy-waiting thread from saturating the pipeline. flush is
699 // often used in loops like this:
700 // while (!flag) {
701 // #pragma omp flush(flag)
702 // }
703 // and adding the yield here is good for at least a 10x speedup
704 // when running >2 threads per core (on the NAS LU benchmark).
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000705 __kmp_yield();
Jonathan Peyton30419822017-05-12 18:01:32 +0000706#endif
707#else
708#error Unknown or unsupported architecture
709#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000710
711#if OMPT_SUPPORT && OMPT_OPTIONAL
712 if (ompt_enabled.ompt_callback_flush) {
713 ompt_callbacks.ompt_callback(ompt_callback_flush)(
714 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
715 }
716#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000717}
718
719/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720/*!
721@ingroup SYNCHRONIZATION
722@param loc source location information
723@param global_tid thread id.
724
725Execute a barrier.
726*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000727void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
728 KMP_COUNT_BLOCK(OMP_BARRIER);
729 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 if (!TCR_4(__kmp_init_parallel))
732 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000734#if OMP_50_ENABLED
735 __kmp_resume_if_soft_paused();
736#endif
737
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 if (__kmp_env_consistency_check) {
739 if (loc == 0) {
740 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000741 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742
Jonathan Peyton30419822017-05-12 18:01:32 +0000743 __kmp_check_barrier(global_tid, ct_barrier, loc);
744 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745
Joachim Protze82e94a52017-11-01 10:08:30 +0000746#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000747 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000748 if (ompt_enabled.enabled) {
749 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000750 if (ompt_frame->enter_frame.ptr == NULL)
751 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000752 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000754#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000755 __kmp_threads[global_tid]->th.th_ident = loc;
756 // TODO: explicit barrier_wait_id:
757 // this function is called when 'barrier' directive is present or
758 // implicit barrier at the end of a worksharing construct.
759 // 1) better to add a per-thread barrier counter to a thread data structure
760 // 2) set to 0 when a new team is created
761 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000762
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000764#if OMPT_SUPPORT && OMPT_OPTIONAL
765 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000766 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000767 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000768#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769}
770
771/* The BARRIER for a MASTER section is always explicit */
772/*!
773@ingroup WORK_SHARING
774@param loc source location information.
775@param global_tid global thread number .
776@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
777*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000778kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
779 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 if (!TCR_4(__kmp_init_parallel))
784 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000785
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000786#if OMP_50_ENABLED
787 __kmp_resume_if_soft_paused();
788#endif
789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790 if (KMP_MASTER_GTID(global_tid)) {
791 KMP_COUNT_BLOCK(OMP_MASTER);
792 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
793 status = 1;
794 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Joachim Protze82e94a52017-11-01 10:08:30 +0000796#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000797 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000798 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 kmp_info_t *this_thr = __kmp_threads[global_tid];
800 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000801
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000803 ompt_callbacks.ompt_callback(ompt_callback_master)(
804 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
805 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
806 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000807 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000809#endif
810
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000812#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000813 if (status)
814 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
815 else
816 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000817#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000818 if (status)
819 __kmp_push_sync(global_tid, ct_master, loc, NULL);
820 else
821 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000822#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000823 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
Jonathan Peyton30419822017-05-12 18:01:32 +0000825 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000826}
827
828/*!
829@ingroup WORK_SHARING
830@param loc source location information.
831@param global_tid global thread number .
832
Jonathan Peyton30419822017-05-12 18:01:32 +0000833Mark the end of a <tt>master</tt> region. This should only be called by the
834thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000836void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
837 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
840 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000841
Joachim Protze82e94a52017-11-01 10:08:30 +0000842#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 kmp_info_t *this_thr = __kmp_threads[global_tid];
844 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000845 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000847 ompt_callbacks.ompt_callback(ompt_callback_master)(
848 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
849 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
850 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000851 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000852#endif
853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 if (__kmp_env_consistency_check) {
855 if (global_tid < 0)
856 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 if (KMP_MASTER_GTID(global_tid))
859 __kmp_pop_sync(global_tid, ct_master, loc);
860 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861}
862
863/*!
864@ingroup WORK_SHARING
865@param loc source location information.
866@param gtid global thread number.
867
868Start execution of an <tt>ordered</tt> construct.
869*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000870void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
871 int cid = 0;
872 kmp_info_t *th;
873 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874
Jonathan Peyton30419822017-05-12 18:01:32 +0000875 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000876
Jonathan Peyton30419822017-05-12 18:01:32 +0000877 if (!TCR_4(__kmp_init_parallel))
878 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000879
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000880#if OMP_50_ENABLED
881 __kmp_resume_if_soft_paused();
882#endif
883
Jim Cownie5e8470a2013-09-27 10:38:44 +0000884#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000885 __kmp_itt_ordered_prep(gtid);
886// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887#endif /* USE_ITT_BUILD */
888
Jonathan Peyton30419822017-05-12 18:01:32 +0000889 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890
Joachim Protze82e94a52017-11-01 10:08:30 +0000891#if OMPT_SUPPORT && OMPT_OPTIONAL
892 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000893 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000894 void *codeptr_ra;
895 if (ompt_enabled.enabled) {
896 OMPT_STORE_RETURN_ADDRESS(gtid);
897 team = __kmp_team_from_gtid(gtid);
Joachim Protze4109d562019-05-20 14:21:42 +0000898 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000899 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000900 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000901 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000902
Jonathan Peyton30419822017-05-12 18:01:32 +0000903 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000904 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
905 if (ompt_enabled.ompt_callback_mutex_acquire) {
906 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze4109d562019-05-20 14:21:42 +0000907 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
908 codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000909 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000911#endif
912
Jonathan Peyton30419822017-05-12 18:01:32 +0000913 if (th->th.th_dispatch->th_deo_fcn != 0)
914 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
915 else
916 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000917
Joachim Protze82e94a52017-11-01 10:08:30 +0000918#if OMPT_SUPPORT && OMPT_OPTIONAL
919 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000921 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000922 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000923
Jonathan Peyton30419822017-05-12 18:01:32 +0000924 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000925 if (ompt_enabled.ompt_callback_mutex_acquired) {
926 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +0000927 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000928 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000929 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000930#endif
931
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000933 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934#endif /* USE_ITT_BUILD */
935}
936
937/*!
938@ingroup WORK_SHARING
939@param loc source location information.
940@param gtid global thread number.
941
942End execution of an <tt>ordered</tt> construct.
943*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000944void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
945 int cid = 0;
946 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000947
Jonathan Peyton30419822017-05-12 18:01:32 +0000948 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000949
950#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 __kmp_itt_ordered_end(gtid);
952// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000953#endif /* USE_ITT_BUILD */
954
Jonathan Peyton30419822017-05-12 18:01:32 +0000955 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000956
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 if (th->th.th_dispatch->th_dxo_fcn != 0)
958 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
959 else
960 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000961
Joachim Protze82e94a52017-11-01 10:08:30 +0000962#if OMPT_SUPPORT && OMPT_OPTIONAL
963 OMPT_STORE_RETURN_ADDRESS(gtid);
964 if (ompt_enabled.ompt_callback_mutex_released) {
965 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
966 ompt_mutex_ordered,
Joachim Protze4109d562019-05-20 14:21:42 +0000967 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
968 ->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000969 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000970 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000971#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972}
973
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000974#if KMP_USE_DYNAMIC_LOCK
975
Jonathan Peytondae13d82015-12-11 21:57:06 +0000976static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000977__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
978 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
979 // Pointer to the allocated indirect lock is written to crit, while indexing
980 // is ignored.
981 void *idx;
982 kmp_indirect_lock_t **lck;
983 lck = (kmp_indirect_lock_t **)crit;
984 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
985 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
986 KMP_SET_I_LOCK_LOCATION(ilk, loc);
987 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
988 KA_TRACE(20,
989 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000990#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000991 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000992#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000993 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000995#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000997#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000998 // We don't really need to destroy the unclaimed lock here since it will be
999 // cleaned up at program exit.
1000 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1001 }
1002 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001003}
1004
1005// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001006#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1007 { \
1008 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001009 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1010 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1011 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1012 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +00001013 kmp_uint32 spins; \
1014 KMP_FSYNC_PREPARE(l); \
1015 KMP_INIT_YIELD(spins); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001016 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001017 do { \
Jonathan Peyton30419822017-05-12 18:01:32 +00001018 if (TCR_4(__kmp_nth) > \
1019 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1020 KMP_YIELD(TRUE); \
1021 } else { \
1022 KMP_YIELD_SPIN(spins); \
1023 } \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001024 __kmp_spin_backoff(&backoff); \
1025 } while ( \
1026 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1027 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001028 } \
1029 KMP_FSYNC_ACQUIRED(l); \
1030 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001031
1032// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001033#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1034 { \
1035 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001036 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1037 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1038 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1039 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001040 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001041
1042// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001043#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001044 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001045
Jonathan Peytondae13d82015-12-11 21:57:06 +00001046#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001047
Jonathan Peyton30419822017-05-12 18:01:32 +00001048#include <sys/syscall.h>
1049#include <unistd.h>
1050#ifndef FUTEX_WAIT
1051#define FUTEX_WAIT 0
1052#endif
1053#ifndef FUTEX_WAKE
1054#define FUTEX_WAKE 1
1055#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001056
1057// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001058#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1059 { \
1060 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1061 kmp_int32 gtid_code = (gtid + 1) << 1; \
1062 KMP_MB(); \
1063 KMP_FSYNC_PREPARE(ftx); \
1064 kmp_int32 poll_val; \
1065 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1066 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1067 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1068 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1069 if (!cond) { \
1070 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1071 poll_val | \
1072 KMP_LOCK_BUSY(1, futex))) { \
1073 continue; \
1074 } \
1075 poll_val |= KMP_LOCK_BUSY(1, futex); \
1076 } \
1077 kmp_int32 rc; \
1078 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1079 NULL, NULL, 0)) != 0) { \
1080 continue; \
1081 } \
1082 gtid_code |= 1; \
1083 } \
1084 KMP_FSYNC_ACQUIRED(ftx); \
1085 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001086
1087// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001088#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1089 { \
1090 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1091 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1092 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1093 KMP_FSYNC_ACQUIRED(ftx); \
1094 rc = TRUE; \
1095 } else { \
1096 rc = FALSE; \
1097 } \
1098 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001099
1100// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001101#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1102 { \
1103 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1104 KMP_MB(); \
1105 KMP_FSYNC_RELEASING(ftx); \
1106 kmp_int32 poll_val = \
1107 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1108 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1109 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1110 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1111 } \
1112 KMP_MB(); \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001113 KMP_YIELD_OVERSUB(); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001115
Jonathan Peytondae13d82015-12-11 21:57:06 +00001116#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001117
1118#else // KMP_USE_DYNAMIC_LOCK
1119
Jonathan Peyton30419822017-05-12 18:01:32 +00001120static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1121 ident_t const *loc,
1122 kmp_int32 gtid) {
1123 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124
Jonathan Peyton30419822017-05-12 18:01:32 +00001125 // Because of the double-check, the following load doesn't need to be volatile
1126 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001127
Jonathan Peyton30419822017-05-12 18:01:32 +00001128 if (lck == NULL) {
1129 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 // Allocate & initialize the lock.
1132 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1133 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1134 __kmp_init_user_lock_with_checks(lck);
1135 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 __kmp_itt_critical_creating(lck);
1138// __kmp_itt_critical_creating() should be called *before* the first usage
1139// of underlying lock. It is the only place where we can guarantee it. There
1140// are chances the lock will destroyed with no usage, but it is not a
1141// problem, because this is not real event seen by user but rather setting
1142// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143#endif /* USE_ITT_BUILD */
1144
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 // Use a cmpxchg instruction to slam the start of the critical section with
1146 // the lock pointer. If another thread beat us to it, deallocate the lock,
1147 // and use the lock that the other thread allocated.
1148 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001149
Jonathan Peyton30419822017-05-12 18:01:32 +00001150 if (status == 0) {
1151// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001153 __kmp_itt_critical_destroyed(lck);
1154// Let ITT know the lock is destroyed and the same memory location may be reused
1155// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001156#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 __kmp_destroy_user_lock_with_checks(lck);
1158 __kmp_user_lock_free(&idx, gtid, lck);
1159 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1160 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001161 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001162 }
1163 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164}
1165
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001166#endif // KMP_USE_DYNAMIC_LOCK
1167
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168/*!
1169@ingroup WORK_SHARING
1170@param loc source location information.
1171@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001172@param crit identity of the critical section. This could be a pointer to a lock
1173associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001174
1175Enter code protected by a `critical` construct.
1176This function blocks until the executing thread can enter the critical section.
1177*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001178void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1179 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001180#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001181#if OMPT_SUPPORT && OMPT_OPTIONAL
1182 OMPT_STORE_RETURN_ADDRESS(global_tid);
1183#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001185#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001186 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001187#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001188 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001189 ompt_thread_info_t ti;
1190#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001192
Jonathan Peyton30419822017-05-12 18:01:32 +00001193 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001194
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001196
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001197 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001198 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001199
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 if ((__kmp_user_lock_kind == lk_tas) &&
1201 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1202 lck = (kmp_user_lock_p)crit;
1203 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001204#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001205 else if ((__kmp_user_lock_kind == lk_futex) &&
1206 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1207 lck = (kmp_user_lock_p)crit;
1208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001209#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001210 else { // ticket, queuing or drdpa
1211 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1212 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213
Jonathan Peyton30419822017-05-12 18:01:32 +00001214 if (__kmp_env_consistency_check)
1215 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001216
Jonathan Peyton30419822017-05-12 18:01:32 +00001217// since the critical directive binds to all threads, not just the current
1218// team we have to check this even if we are in a serialized team.
1219// also, even if we are the uber thread, we still have to conduct the lock,
1220// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
1222#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001223 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001224#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001225#if OMPT_SUPPORT && OMPT_OPTIONAL
1226 OMPT_STORE_RETURN_ADDRESS(gtid);
1227 void *codeptr_ra = NULL;
1228 if (ompt_enabled.enabled) {
1229 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1230 /* OMPT state update */
1231 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001232 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001233 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001234
1235 /* OMPT event callback */
1236 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1237 if (ompt_enabled.ompt_callback_mutex_acquire) {
1238 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1239 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00001240 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001241 }
1242 }
1243#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001244 // Value of 'crit' should be good for using as a critical_id of the critical
1245 // section directive.
1246 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001247
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001248#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001249 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001250#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001251#if OMPT_SUPPORT && OMPT_OPTIONAL
1252 if (ompt_enabled.enabled) {
1253 /* OMPT state update */
1254 ti.state = prev_state;
1255 ti.wait_id = 0;
1256
1257 /* OMPT event callback */
1258 if (ompt_enabled.ompt_callback_mutex_acquired) {
1259 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001260 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001261 }
1262 }
1263#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001264 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001265
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001266 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001267 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001268#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001269}
1270
1271#if KMP_USE_DYNAMIC_LOCK
1272
1273// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001274static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001275#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001276#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001277#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001278#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001279#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001280
1281#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001282#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001283#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001284#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001285#endif
1286
Jonathan Peyton30419822017-05-12 18:01:32 +00001287 // Hints that do not require further logic
1288 if (hint & kmp_lock_hint_hle)
1289 return KMP_TSX_LOCK(hle);
1290 if (hint & kmp_lock_hint_rtm)
1291 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1292 if (hint & kmp_lock_hint_adaptive)
1293 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001294
Jonathan Peyton30419822017-05-12 18:01:32 +00001295 // Rule out conflicting hints first by returning the default lock
1296 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001297 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001298 if ((hint & omp_lock_hint_speculative) &&
1299 (hint & omp_lock_hint_nonspeculative))
1300 return __kmp_user_lock_seq;
1301
1302 // Do not even consider speculation when it appears to be contended
1303 if (hint & omp_lock_hint_contended)
1304 return lockseq_queuing;
1305
1306 // Uncontended lock without speculation
1307 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1308 return lockseq_tas;
1309
1310 // HLE lock for speculation
1311 if (hint & omp_lock_hint_speculative)
1312 return KMP_TSX_LOCK(hle);
1313
1314 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001315}
1316
Joachim Protze82e94a52017-11-01 10:08:30 +00001317#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001318#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001319static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001320__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1321 if (user_lock) {
1322 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1323 case 0:
1324 break;
1325#if KMP_USE_FUTEX
1326 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001327 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001328#endif
1329 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001330 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001331#if KMP_USE_TSX
1332 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001333 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001334#endif
1335 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001336 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001337 }
1338 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1339 }
1340 KMP_ASSERT(ilock);
1341 switch (ilock->type) {
1342#if KMP_USE_TSX
1343 case locktag_adaptive:
1344 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001345 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001346#endif
1347 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001348 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001349#if KMP_USE_FUTEX
1350 case locktag_nested_futex:
1351#endif
1352 case locktag_ticket:
1353 case locktag_queuing:
1354 case locktag_drdpa:
1355 case locktag_nested_ticket:
1356 case locktag_nested_queuing:
1357 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001358 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001359 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001360 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001361 }
1362}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001363#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001364// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001365static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001366 switch (__kmp_user_lock_kind) {
1367 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001368 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001369#if KMP_USE_FUTEX
1370 case lk_futex:
1371#endif
1372 case lk_ticket:
1373 case lk_queuing:
1374 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001375 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001376#if KMP_USE_TSX
1377 case lk_hle:
1378 case lk_rtm:
1379 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001380 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001381#endif
1382 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001383 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001384 }
1385}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001386#endif // KMP_USE_DYNAMIC_LOCK
1387#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001388
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001389/*!
1390@ingroup WORK_SHARING
1391@param loc source location information.
1392@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001393@param crit identity of the critical section. This could be a pointer to a lock
1394associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001395@param hint the lock hint.
1396
Jonathan Peyton30419822017-05-12 18:01:32 +00001397Enter code protected by a `critical` construct with a hint. The hint value is
1398used to suggest a lock implementation. This function blocks until the executing
1399thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001400speculative execution and the hardware supports it.
1401*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001402void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001403 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001404 KMP_COUNT_BLOCK(OMP_CRITICAL);
1405 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001406#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001407 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001408 ompt_thread_info_t ti;
1409 // This is the case, if called from __kmpc_critical:
1410 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1411 if (!codeptr)
1412 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1413#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001414
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001416
Jonathan Peyton30419822017-05-12 18:01:32 +00001417 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1418 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001419 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 if (*lk == 0) {
1421 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1422 if (KMP_IS_D_LOCK(lckseq)) {
1423 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1424 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001425 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001426 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001427 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001428 }
1429 // Branch for accessing the actual lock object and set operation. This
1430 // branching is inevitable since this lock initialization does not follow the
1431 // normal dispatch path (lock table is not used).
1432 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1433 lck = (kmp_user_lock_p)lk;
1434 if (__kmp_env_consistency_check) {
1435 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1436 __kmp_map_hint_to_lock(hint));
1437 }
1438#if USE_ITT_BUILD
1439 __kmp_itt_critical_acquiring(lck);
1440#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001441#if OMPT_SUPPORT && OMPT_OPTIONAL
1442 if (ompt_enabled.enabled) {
1443 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1444 /* OMPT state update */
1445 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001446 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001447 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001448
1449 /* OMPT event callback */
1450 if (ompt_enabled.ompt_callback_mutex_acquire) {
1451 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1452 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001453 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1454 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001455 }
1456 }
1457#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001458#if KMP_USE_INLINED_TAS
1459 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1460 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1461 } else
1462#elif KMP_USE_INLINED_FUTEX
1463 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1464 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1465 } else
1466#endif
1467 {
1468 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1469 }
1470 } else {
1471 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1472 lck = ilk->lock;
1473 if (__kmp_env_consistency_check) {
1474 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1475 __kmp_map_hint_to_lock(hint));
1476 }
1477#if USE_ITT_BUILD
1478 __kmp_itt_critical_acquiring(lck);
1479#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001480#if OMPT_SUPPORT && OMPT_OPTIONAL
1481 if (ompt_enabled.enabled) {
1482 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1483 /* OMPT state update */
1484 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001485 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001486 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001487
1488 /* OMPT event callback */
1489 if (ompt_enabled.ompt_callback_mutex_acquire) {
1490 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1491 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001492 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1493 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001494 }
1495 }
1496#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001497 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1498 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001499 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001500
Jim Cownie5e8470a2013-09-27 10:38:44 +00001501#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001502 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001504#if OMPT_SUPPORT && OMPT_OPTIONAL
1505 if (ompt_enabled.enabled) {
1506 /* OMPT state update */
1507 ti.state = prev_state;
1508 ti.wait_id = 0;
1509
1510 /* OMPT event callback */
1511 if (ompt_enabled.ompt_callback_mutex_acquired) {
1512 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001513 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001514 }
1515 }
1516#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001517
Jonathan Peyton30419822017-05-12 18:01:32 +00001518 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1519 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001520} // __kmpc_critical_with_hint
1521
1522#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001523
1524/*!
1525@ingroup WORK_SHARING
1526@param loc source location information.
1527@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001528@param crit identity of the critical section. This could be a pointer to a lock
1529associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530
1531Leave a critical section, releasing any lock that was held during its execution.
1532*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001533void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1534 kmp_critical_name *crit) {
1535 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001536
Jonathan Peyton30419822017-05-12 18:01:32 +00001537 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001538
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001539#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001540 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1541 lck = (kmp_user_lock_p)crit;
1542 KMP_ASSERT(lck != NULL);
1543 if (__kmp_env_consistency_check) {
1544 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001545 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001546#if USE_ITT_BUILD
1547 __kmp_itt_critical_releasing(lck);
1548#endif
1549#if KMP_USE_INLINED_TAS
1550 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1551 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1552 } else
1553#elif KMP_USE_INLINED_FUTEX
1554 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1555 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1556 } else
1557#endif
1558 {
1559 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1560 }
1561 } else {
1562 kmp_indirect_lock_t *ilk =
1563 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1564 KMP_ASSERT(ilk != NULL);
1565 lck = ilk->lock;
1566 if (__kmp_env_consistency_check) {
1567 __kmp_pop_sync(global_tid, ct_critical, loc);
1568 }
1569#if USE_ITT_BUILD
1570 __kmp_itt_critical_releasing(lck);
1571#endif
1572 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1573 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001574
1575#else // KMP_USE_DYNAMIC_LOCK
1576
Jonathan Peyton30419822017-05-12 18:01:32 +00001577 if ((__kmp_user_lock_kind == lk_tas) &&
1578 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1579 lck = (kmp_user_lock_p)crit;
1580 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001581#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001582 else if ((__kmp_user_lock_kind == lk_futex) &&
1583 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1584 lck = (kmp_user_lock_p)crit;
1585 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001586#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001587 else { // ticket, queuing or drdpa
1588 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1589 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590
Jonathan Peyton30419822017-05-12 18:01:32 +00001591 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001592
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 if (__kmp_env_consistency_check)
1594 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595
1596#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 // Value of 'crit' should be good for using as a critical_id of the critical
1600 // section directive.
1601 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
Joachim Protze82e94a52017-11-01 10:08:30 +00001603#endif // KMP_USE_DYNAMIC_LOCK
1604
1605#if OMPT_SUPPORT && OMPT_OPTIONAL
1606 /* OMPT release event triggers after lock is released; place here to trigger
1607 * for all #if branches */
1608 OMPT_STORE_RETURN_ADDRESS(global_tid);
1609 if (ompt_enabled.ompt_callback_mutex_released) {
1610 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00001611 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1612 OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001613 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001614#endif
1615
Jonathan Peyton30419822017-05-12 18:01:32 +00001616 KMP_POP_PARTITIONED_TIMER();
1617 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001618}
1619
1620/*!
1621@ingroup SYNCHRONIZATION
1622@param loc source location information
1623@param global_tid thread id.
1624@return one if the thread should execute the master block, zero otherwise
1625
Jonathan Peyton30419822017-05-12 18:01:32 +00001626Start execution of a combined barrier and master. The barrier is executed inside
1627this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001628*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001629kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1630 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631
Jonathan Peyton30419822017-05-12 18:01:32 +00001632 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633
Jonathan Peyton30419822017-05-12 18:01:32 +00001634 if (!TCR_4(__kmp_init_parallel))
1635 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001637#if OMP_50_ENABLED
1638 __kmp_resume_if_soft_paused();
1639#endif
1640
Jonathan Peyton30419822017-05-12 18:01:32 +00001641 if (__kmp_env_consistency_check)
1642 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643
Joachim Protze82e94a52017-11-01 10:08:30 +00001644#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001645 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001646 if (ompt_enabled.enabled) {
1647 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001648 if (ompt_frame->enter_frame.ptr == NULL)
1649 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001650 OMPT_STORE_RETURN_ADDRESS(global_tid);
1651 }
1652#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001653#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001654 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001655#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001656 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001657#if OMPT_SUPPORT && OMPT_OPTIONAL
1658 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001659 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001660 }
1661#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001662
Jonathan Peyton30419822017-05-12 18:01:32 +00001663 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664}
1665
1666/*!
1667@ingroup SYNCHRONIZATION
1668@param loc source location information
1669@param global_tid thread id.
1670
1671Complete the execution of a combined barrier and master. This function should
1672only be called at the completion of the <tt>master</tt> code. Other threads will
1673still be waiting at the barrier and this call releases them.
1674*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001675void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1676 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679}
1680
1681/*!
1682@ingroup SYNCHRONIZATION
1683@param loc source location information
1684@param global_tid thread id.
1685@return one if the thread should execute the master block, zero otherwise
1686
1687Start execution of a combined barrier and master(nowait) construct.
1688The barrier is executed inside this function.
1689There is no equivalent "end" function, since the
1690*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001691kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1692 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695
Jonathan Peyton30419822017-05-12 18:01:32 +00001696 if (!TCR_4(__kmp_init_parallel))
1697 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001699#if OMP_50_ENABLED
1700 __kmp_resume_if_soft_paused();
1701#endif
1702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 if (__kmp_env_consistency_check) {
1704 if (loc == 0) {
1705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001707 __kmp_check_barrier(global_tid, ct_barrier, loc);
1708 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709
Joachim Protze82e94a52017-11-01 10:08:30 +00001710#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001711 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001712 if (ompt_enabled.enabled) {
1713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001714 if (ompt_frame->enter_frame.ptr == NULL)
1715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001716 OMPT_STORE_RETURN_ADDRESS(global_tid);
1717 }
1718#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001719#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001720 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001721#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001722 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001723#if OMPT_SUPPORT && OMPT_OPTIONAL
1724 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001725 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001726 }
1727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728
Jonathan Peyton30419822017-05-12 18:01:32 +00001729 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730
Jonathan Peyton30419822017-05-12 18:01:32 +00001731 if (__kmp_env_consistency_check) {
1732 /* there's no __kmpc_end_master called; so the (stats) */
1733 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001734
Jonathan Peyton30419822017-05-12 18:01:32 +00001735 if (global_tid < 0) {
1736 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001737 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001738 if (ret) {
1739 /* only one thread should do the pop since only */
1740 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001741
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 __kmp_pop_sync(global_tid, ct_master, loc);
1743 }
1744 }
1745
1746 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001747}
1748
1749/* The BARRIER for a SINGLE process section is always explicit */
1750/*!
1751@ingroup WORK_SHARING
1752@param loc source location information
1753@param global_tid global thread number
1754@return One if this thread should execute the single construct, zero otherwise.
1755
1756Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001757There are no implicit barriers in the two "single" calls, rather the compiler
1758should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001759*/
1760
Jonathan Peyton30419822017-05-12 18:01:32 +00001761kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1762 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001763
Jonathan Peyton30419822017-05-12 18:01:32 +00001764 if (rc) {
1765 // We are going to execute the single statement, so we should count it.
1766 KMP_COUNT_BLOCK(OMP_SINGLE);
1767 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1768 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001769
Joachim Protze82e94a52017-11-01 10:08:30 +00001770#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001771 kmp_info_t *this_thr = __kmp_threads[global_tid];
1772 kmp_team_t *team = this_thr->th.th_team;
1773 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774
Joachim Protze82e94a52017-11-01 10:08:30 +00001775 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001776 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001777 if (ompt_enabled.ompt_callback_work) {
1778 ompt_callbacks.ompt_callback(ompt_callback_work)(
1779 ompt_work_single_executor, ompt_scope_begin,
1780 &(team->t.ompt_team_info.parallel_data),
1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1782 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001783 }
1784 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001785 if (ompt_enabled.ompt_callback_work) {
1786 ompt_callbacks.ompt_callback(ompt_callback_work)(
1787 ompt_work_single_other, ompt_scope_begin,
1788 &(team->t.ompt_team_info.parallel_data),
1789 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1790 1, OMPT_GET_RETURN_ADDRESS(0));
1791 ompt_callbacks.ompt_callback(ompt_callback_work)(
1792 ompt_work_single_other, ompt_scope_end,
1793 &(team->t.ompt_team_info.parallel_data),
1794 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1795 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001796 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001797 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001798 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001799#endif
1800
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802}
1803
1804/*!
1805@ingroup WORK_SHARING
1806@param loc source location information
1807@param global_tid global thread number
1808
1809Mark the end of a <tt>single</tt> construct. This function should
1810only be called by the thread that executed the block of code protected
1811by the `single` construct.
1812*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001813void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1814 __kmp_exit_single(global_tid);
1815 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001816
Joachim Protze82e94a52017-11-01 10:08:30 +00001817#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 kmp_info_t *this_thr = __kmp_threads[global_tid];
1819 kmp_team_t *team = this_thr->th.th_team;
1820 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821
Joachim Protze82e94a52017-11-01 10:08:30 +00001822 if (ompt_enabled.ompt_callback_work) {
1823 ompt_callbacks.ompt_callback(ompt_callback_work)(
1824 ompt_work_single_executor, ompt_scope_end,
1825 &(team->t.ompt_team_info.parallel_data),
1826 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1827 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001828 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001829#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001830}
1831
1832/*!
1833@ingroup WORK_SHARING
1834@param loc Source location
1835@param global_tid Global thread id
1836
1837Mark the end of a statically scheduled loop.
1838*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001839void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001840 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001841 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001842
Joachim Protze82e94a52017-11-01 10:08:30 +00001843#if OMPT_SUPPORT && OMPT_OPTIONAL
1844 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001845 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001847 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1848 // Determine workshare type
1849 if (loc != NULL) {
1850 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1851 ompt_work_type = ompt_work_loop;
1852 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1853 ompt_work_type = ompt_work_sections;
1854 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1855 ompt_work_type = ompt_work_distribute;
1856 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001857 // use default set above.
1858 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001859 }
1860 KMP_DEBUG_ASSERT(ompt_work_type);
1861 }
1862 ompt_callbacks.ompt_callback(ompt_callback_work)(
1863 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1864 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001865 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001866#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001867 if (__kmp_env_consistency_check)
1868 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001869}
1870
Jonathan Peyton30419822017-05-12 18:01:32 +00001871// User routines which take C-style arguments (call by value)
1872// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873
Jonathan Peyton30419822017-05-12 18:01:32 +00001874void ompc_set_num_threads(int arg) {
1875 // !!!!! TODO: check the per-task binding
1876 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877}
1878
Jonathan Peyton30419822017-05-12 18:01:32 +00001879void ompc_set_dynamic(int flag) {
1880 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 /* For the thread-private implementation of the internal controls */
1883 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001884
Jonathan Peyton30419822017-05-12 18:01:32 +00001885 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001886
Jonathan Peyton30419822017-05-12 18:01:32 +00001887 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888}
1889
Jonathan Peyton30419822017-05-12 18:01:32 +00001890void ompc_set_nested(int flag) {
1891 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001892
Jonathan Peyton30419822017-05-12 18:01:32 +00001893 /* For the thread-private internal controls implementation */
1894 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001895
Jonathan Peyton30419822017-05-12 18:01:32 +00001896 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001897
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001898 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001899}
1900
Jonathan Peyton30419822017-05-12 18:01:32 +00001901void ompc_set_max_active_levels(int max_active_levels) {
1902 /* TO DO */
1903 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904
Jonathan Peyton30419822017-05-12 18:01:32 +00001905 /* For the per-thread internal controls implementation */
1906 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907}
1908
Jonathan Peyton30419822017-05-12 18:01:32 +00001909void ompc_set_schedule(omp_sched_t kind, int modifier) {
1910 // !!!!! TODO: check the per-task binding
1911 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001912}
1913
Jonathan Peyton30419822017-05-12 18:01:32 +00001914int ompc_get_ancestor_thread_num(int level) {
1915 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916}
1917
Jonathan Peyton30419822017-05-12 18:01:32 +00001918int ompc_get_team_size(int level) {
1919 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920}
1921
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001922#if OMP_50_ENABLED
1923/* OpenMP 5.0 Affinity Format API */
1924
1925void ompc_set_affinity_format(char const *format) {
1926 if (!__kmp_init_serial) {
1927 __kmp_serial_initialize();
1928 }
1929 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1930 format, KMP_STRLEN(format) + 1);
1931}
1932
1933size_t ompc_get_affinity_format(char *buffer, size_t size) {
1934 size_t format_size;
1935 if (!__kmp_init_serial) {
1936 __kmp_serial_initialize();
1937 }
1938 format_size = KMP_STRLEN(__kmp_affinity_format);
1939 if (buffer && size) {
1940 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1941 format_size + 1);
1942 }
1943 return format_size;
1944}
1945
1946void ompc_display_affinity(char const *format) {
1947 int gtid;
1948 if (!TCR_4(__kmp_init_middle)) {
1949 __kmp_middle_initialize();
1950 }
1951 gtid = __kmp_get_gtid();
1952 __kmp_aux_display_affinity(gtid, format);
1953}
1954
1955size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1956 char const *format) {
1957 int gtid;
1958 size_t num_required;
1959 kmp_str_buf_t capture_buf;
1960 if (!TCR_4(__kmp_init_middle)) {
1961 __kmp_middle_initialize();
1962 }
1963 gtid = __kmp_get_gtid();
1964 __kmp_str_buf_init(&capture_buf);
1965 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1966 if (buffer && buf_size) {
1967 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1968 capture_buf.used + 1);
1969 }
1970 __kmp_str_buf_free(&capture_buf);
1971 return num_required;
1972}
1973#endif /* OMP_50_ENABLED */
1974
Jonathan Peyton30419822017-05-12 18:01:32 +00001975void kmpc_set_stacksize(int arg) {
1976 // __kmp_aux_set_stacksize initializes the library if needed
1977 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978}
1979
Jonathan Peyton30419822017-05-12 18:01:32 +00001980void kmpc_set_stacksize_s(size_t arg) {
1981 // __kmp_aux_set_stacksize initializes the library if needed
1982 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983}
1984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985void kmpc_set_blocktime(int arg) {
1986 int gtid, tid;
1987 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 gtid = __kmp_entry_gtid();
1990 tid = __kmp_tid_from_gtid(gtid);
1991 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994}
1995
Jonathan Peyton30419822017-05-12 18:01:32 +00001996void kmpc_set_library(int arg) {
1997 // __kmp_user_set_library initializes the library if needed
1998 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999}
2000
Jonathan Peyton30419822017-05-12 18:01:32 +00002001void kmpc_set_defaults(char const *str) {
2002 // __kmp_aux_set_defaults initializes the library if needed
2003 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004}
2005
Jonathan Peyton30419822017-05-12 18:01:32 +00002006void kmpc_set_disp_num_buffers(int arg) {
2007 // ignore after initialization because some teams have already
2008 // allocated dispatch buffers
2009 if (__kmp_init_serial == 0 && arg > 0)
2010 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002011}
2012
Jonathan Peyton30419822017-05-12 18:01:32 +00002013int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002014#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002015 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002017 if (!TCR_4(__kmp_init_middle)) {
2018 __kmp_middle_initialize();
2019 }
2020 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021#endif
2022}
2023
Jonathan Peyton30419822017-05-12 18:01:32 +00002024int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002025#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002028 if (!TCR_4(__kmp_init_middle)) {
2029 __kmp_middle_initialize();
2030 }
2031 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032#endif
2033}
2034
Jonathan Peyton30419822017-05-12 18:01:32 +00002035int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002036#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002037 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002038#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002039 if (!TCR_4(__kmp_init_middle)) {
2040 __kmp_middle_initialize();
2041 }
2042 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043#endif
2044}
2045
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046/* -------------------------------------------------------------------------- */
2047/*!
2048@ingroup THREADPRIVATE
2049@param loc source location information
2050@param gtid global thread number
2051@param cpy_size size of the cpy_data buffer
2052@param cpy_data pointer to data to be copied
2053@param cpy_func helper function to call for copying data
2054@param didit flag variable: 1=single thread; 0=not single thread
2055
Jonathan Peyton30419822017-05-12 18:01:32 +00002056__kmpc_copyprivate implements the interface for the private data broadcast
2057needed for the copyprivate clause associated with a single region in an
2058OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002060One of the threads (called the single thread) should have the <tt>didit</tt>
2061variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062All threads pass a pointer to a data buffer (cpy_data) that they have built.
2063
Jonathan Peyton30419822017-05-12 18:01:32 +00002064The OpenMP specification forbids the use of nowait on the single region when a
2065copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2066barrier internally to avoid race conditions, so the code generation for the
2067single region should avoid generating a barrier after the call to @ref
2068__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002069
2070The <tt>gtid</tt> parameter is the global thread id for the current thread.
2071The <tt>loc</tt> parameter is a pointer to source location information.
2072
Jonathan Peyton30419822017-05-12 18:01:32 +00002073Internal implementation: The single thread will first copy its descriptor
2074address (cpy_data) to a team-private location, then the other threads will each
2075call the function pointed to by the parameter cpy_func, which carries out the
2076copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002077
Jonathan Peyton30419822017-05-12 18:01:32 +00002078The cpy_func routine used for the copy and the contents of the data area defined
2079by cpy_data and cpy_size may be built in any fashion that will allow the copy
2080to be done. For instance, the cpy_data buffer can hold the actual data to be
2081copied or it may hold a list of pointers to the data. The cpy_func routine must
2082interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
2084The interface to cpy_func is as follows:
2085@code
2086void cpy_func( void *destination, void *source )
2087@endcode
2088where void *destination is the cpy_data pointer for the thread being copied to
2089and void *source is the cpy_data pointer for the thread being copied from.
2090*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002091void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2092 void *cpy_data, void (*cpy_func)(void *, void *),
2093 kmp_int32 didit) {
2094 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
Jonathan Peyton30419822017-05-12 18:01:32 +00002096 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099
Jonathan Peyton30419822017-05-12 18:01:32 +00002100 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101
Jonathan Peyton30419822017-05-12 18:01:32 +00002102 if (__kmp_env_consistency_check) {
2103 if (loc == 0) {
2104 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002106 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107
Jonathan Peyton30419822017-05-12 18:01:32 +00002108 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109
Jonathan Peyton30419822017-05-12 18:01:32 +00002110 if (didit)
2111 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112
Joachim Protze82e94a52017-11-01 10:08:30 +00002113#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002114 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002115 if (ompt_enabled.enabled) {
2116 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002117 if (ompt_frame->enter_frame.ptr == NULL)
2118 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002119 OMPT_STORE_RETURN_ADDRESS(gtid);
2120 }
2121#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002122/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002123#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002124 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002125#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002126 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002127
Jonathan Peyton30419822017-05-12 18:01:32 +00002128 if (!didit)
2129 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130
Jonathan Peyton30419822017-05-12 18:01:32 +00002131// Consider next barrier a user-visible barrier for barrier region boundaries
2132// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133
Joachim Protze82e94a52017-11-01 10:08:30 +00002134#if OMPT_SUPPORT
2135 if (ompt_enabled.enabled) {
2136 OMPT_STORE_RETURN_ADDRESS(gtid);
2137 }
2138#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002139#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002140 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2141// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002142#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002143 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002144#if OMPT_SUPPORT && OMPT_OPTIONAL
2145 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002146 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002147 }
2148#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002149}
2150
2151/* -------------------------------------------------------------------------- */
2152
Jonathan Peyton30419822017-05-12 18:01:32 +00002153#define INIT_LOCK __kmp_init_user_lock_with_checks
2154#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2155#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2156#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2157#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2158#define ACQUIRE_NESTED_LOCK_TIMED \
2159 __kmp_acquire_nested_user_lock_with_checks_timed
2160#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2161#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2162#define TEST_LOCK __kmp_test_user_lock_with_checks
2163#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2164#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2165#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166
Jonathan Peyton30419822017-05-12 18:01:32 +00002167// TODO: Make check abort messages use location info & pass it into
2168// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002170#if KMP_USE_DYNAMIC_LOCK
2171
2172// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002173static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2174 kmp_dyna_lockseq_t seq) {
2175 if (KMP_IS_D_LOCK(seq)) {
2176 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002177#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002178 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002179#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002180 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002181 KMP_INIT_I_LOCK(lock, seq);
2182#if USE_ITT_BUILD
2183 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2184 __kmp_itt_lock_creating(ilk->lock, loc);
2185#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002186 }
2187}
2188
2189// internal nest lock initializer
2190static __forceinline void
2191__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2192 kmp_dyna_lockseq_t seq) {
2193#if KMP_USE_TSX
2194 // Don't have nested lock implementation for speculative locks
2195 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2196 seq = __kmp_user_lock_seq;
2197#endif
2198 switch (seq) {
2199 case lockseq_tas:
2200 seq = lockseq_nested_tas;
2201 break;
2202#if KMP_USE_FUTEX
2203 case lockseq_futex:
2204 seq = lockseq_nested_futex;
2205 break;
2206#endif
2207 case lockseq_ticket:
2208 seq = lockseq_nested_ticket;
2209 break;
2210 case lockseq_queuing:
2211 seq = lockseq_nested_queuing;
2212 break;
2213 case lockseq_drdpa:
2214 seq = lockseq_nested_drdpa;
2215 break;
2216 default:
2217 seq = lockseq_nested_queuing;
2218 }
2219 KMP_INIT_I_LOCK(lock, seq);
2220#if USE_ITT_BUILD
2221 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2222 __kmp_itt_lock_creating(ilk->lock, loc);
2223#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002224}
2225
2226/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002227void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2228 uintptr_t hint) {
2229 KMP_DEBUG_ASSERT(__kmp_init_serial);
2230 if (__kmp_env_consistency_check && user_lock == NULL) {
2231 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2232 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002233
Jonathan Peyton30419822017-05-12 18:01:32 +00002234 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002235
2236#if OMPT_SUPPORT && OMPT_OPTIONAL
2237 // This is the case, if called from omp_init_lock_with_hint:
2238 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2239 if (!codeptr)
2240 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2241 if (ompt_enabled.ompt_callback_lock_init) {
2242 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2243 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002244 __ompt_get_mutex_impl_type(user_lock),
2245 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002246 }
2247#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002248}
2249
2250/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002251void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2252 void **user_lock, uintptr_t hint) {
2253 KMP_DEBUG_ASSERT(__kmp_init_serial);
2254 if (__kmp_env_consistency_check && user_lock == NULL) {
2255 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2256 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002257
Jonathan Peyton30419822017-05-12 18:01:32 +00002258 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002259
2260#if OMPT_SUPPORT && OMPT_OPTIONAL
2261 // This is the case, if called from omp_init_lock_with_hint:
2262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2263 if (!codeptr)
2264 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2265 if (ompt_enabled.ompt_callback_lock_init) {
2266 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2267 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002268 __ompt_get_mutex_impl_type(user_lock),
2269 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002270 }
2271#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002272}
2273
2274#endif // KMP_USE_DYNAMIC_LOCK
2275
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002277void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002278#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002279
2280 KMP_DEBUG_ASSERT(__kmp_init_serial);
2281 if (__kmp_env_consistency_check && user_lock == NULL) {
2282 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2283 }
2284 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002285
Joachim Protze82e94a52017-11-01 10:08:30 +00002286#if OMPT_SUPPORT && OMPT_OPTIONAL
2287 // This is the case, if called from omp_init_lock_with_hint:
2288 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2289 if (!codeptr)
2290 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2291 if (ompt_enabled.ompt_callback_lock_init) {
2292 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2293 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002294 __ompt_get_mutex_impl_type(user_lock),
2295 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002296 }
2297#endif
2298
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002299#else // KMP_USE_DYNAMIC_LOCK
2300
Jonathan Peyton30419822017-05-12 18:01:32 +00002301 static char const *const func = "omp_init_lock";
2302 kmp_user_lock_p lck;
2303 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 if (__kmp_env_consistency_check) {
2306 if (user_lock == NULL) {
2307 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002309 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002310
Jonathan Peyton30419822017-05-12 18:01:32 +00002311 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312
Jonathan Peyton30419822017-05-12 18:01:32 +00002313 if ((__kmp_user_lock_kind == lk_tas) &&
2314 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2315 lck = (kmp_user_lock_p)user_lock;
2316 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002317#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002318 else if ((__kmp_user_lock_kind == lk_futex) &&
2319 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2320 lck = (kmp_user_lock_p)user_lock;
2321 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002322#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002323 else {
2324 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2325 }
2326 INIT_LOCK(lck);
2327 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328
Joachim Protze82e94a52017-11-01 10:08:30 +00002329#if OMPT_SUPPORT && OMPT_OPTIONAL
2330 // This is the case, if called from omp_init_lock_with_hint:
2331 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2332 if (!codeptr)
2333 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2334 if (ompt_enabled.ompt_callback_lock_init) {
2335 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2336 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002337 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002338 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002339#endif
2340
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002342 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002344
2345#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346} // __kmpc_init_lock
2347
2348/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002349void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002350#if KMP_USE_DYNAMIC_LOCK
2351
Jonathan Peyton30419822017-05-12 18:01:32 +00002352 KMP_DEBUG_ASSERT(__kmp_init_serial);
2353 if (__kmp_env_consistency_check && user_lock == NULL) {
2354 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2355 }
2356 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002357
Joachim Protze82e94a52017-11-01 10:08:30 +00002358#if OMPT_SUPPORT && OMPT_OPTIONAL
2359 // This is the case, if called from omp_init_lock_with_hint:
2360 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2361 if (!codeptr)
2362 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2363 if (ompt_enabled.ompt_callback_lock_init) {
2364 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2365 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002366 __ompt_get_mutex_impl_type(user_lock),
2367 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002368 }
2369#endif
2370
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002371#else // KMP_USE_DYNAMIC_LOCK
2372
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 static char const *const func = "omp_init_nest_lock";
2374 kmp_user_lock_p lck;
2375 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376
Jonathan Peyton30419822017-05-12 18:01:32 +00002377 if (__kmp_env_consistency_check) {
2378 if (user_lock == NULL) {
2379 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002381 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382
Jonathan Peyton30419822017-05-12 18:01:32 +00002383 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002384
Jonathan Peyton30419822017-05-12 18:01:32 +00002385 if ((__kmp_user_lock_kind == lk_tas) &&
2386 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2387 OMP_NEST_LOCK_T_SIZE)) {
2388 lck = (kmp_user_lock_p)user_lock;
2389 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002390#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002391 else if ((__kmp_user_lock_kind == lk_futex) &&
2392 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2393 OMP_NEST_LOCK_T_SIZE)) {
2394 lck = (kmp_user_lock_p)user_lock;
2395 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002397 else {
2398 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400
Jonathan Peyton30419822017-05-12 18:01:32 +00002401 INIT_NESTED_LOCK(lck);
2402 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002403
Joachim Protze82e94a52017-11-01 10:08:30 +00002404#if OMPT_SUPPORT && OMPT_OPTIONAL
2405 // This is the case, if called from omp_init_lock_with_hint:
2406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2407 if (!codeptr)
2408 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2409 if (ompt_enabled.ompt_callback_lock_init) {
2410 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2411 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002412 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002413 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002414#endif
2415
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002417 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002419
2420#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421} // __kmpc_init_nest_lock
2422
Jonathan Peyton30419822017-05-12 18:01:32 +00002423void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002424#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425
Jonathan Peyton30419822017-05-12 18:01:32 +00002426#if USE_ITT_BUILD
2427 kmp_user_lock_p lck;
2428 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2429 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2430 } else {
2431 lck = (kmp_user_lock_p)user_lock;
2432 }
2433 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002435#if OMPT_SUPPORT && OMPT_OPTIONAL
2436 // This is the case, if called from omp_init_lock_with_hint:
2437 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2438 if (!codeptr)
2439 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2440 if (ompt_enabled.ompt_callback_lock_destroy) {
2441 kmp_user_lock_p lck;
2442 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2443 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2444 } else {
2445 lck = (kmp_user_lock_p)user_lock;
2446 }
2447 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002448 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002449 }
2450#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002451 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2452#else
2453 kmp_user_lock_p lck;
2454
2455 if ((__kmp_user_lock_kind == lk_tas) &&
2456 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2457 lck = (kmp_user_lock_p)user_lock;
2458 }
2459#if KMP_USE_FUTEX
2460 else if ((__kmp_user_lock_kind == lk_futex) &&
2461 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462 lck = (kmp_user_lock_p)user_lock;
2463 }
2464#endif
2465 else {
2466 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2467 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468
Joachim Protze82e94a52017-11-01 10:08:30 +00002469#if OMPT_SUPPORT && OMPT_OPTIONAL
2470 // This is the case, if called from omp_init_lock_with_hint:
2471 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2472 if (!codeptr)
2473 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2474 if (ompt_enabled.ompt_callback_lock_destroy) {
2475 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002476 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002477 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002478#endif
2479
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002481 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002483 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002484
Jonathan Peyton30419822017-05-12 18:01:32 +00002485 if ((__kmp_user_lock_kind == lk_tas) &&
2486 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2487 ;
2488 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002489#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002490 else if ((__kmp_user_lock_kind == lk_futex) &&
2491 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2492 ;
2493 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002495 else {
2496 __kmp_user_lock_free(user_lock, gtid, lck);
2497 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002498#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002499} // __kmpc_destroy_lock
2500
2501/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002502void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002503#if KMP_USE_DYNAMIC_LOCK
2504
Jonathan Peyton30419822017-05-12 18:01:32 +00002505#if USE_ITT_BUILD
2506 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2507 __kmp_itt_lock_destroyed(ilk->lock);
2508#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002509#if OMPT_SUPPORT && OMPT_OPTIONAL
2510 // This is the case, if called from omp_init_lock_with_hint:
2511 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2512 if (!codeptr)
2513 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2514 if (ompt_enabled.ompt_callback_lock_destroy) {
2515 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002516 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002517 }
2518#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002519 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002520
2521#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522
Jonathan Peyton30419822017-05-12 18:01:32 +00002523 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524
Jonathan Peyton30419822017-05-12 18:01:32 +00002525 if ((__kmp_user_lock_kind == lk_tas) &&
2526 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2527 OMP_NEST_LOCK_T_SIZE)) {
2528 lck = (kmp_user_lock_p)user_lock;
2529 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002530#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002531 else if ((__kmp_user_lock_kind == lk_futex) &&
2532 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2533 OMP_NEST_LOCK_T_SIZE)) {
2534 lck = (kmp_user_lock_p)user_lock;
2535 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002537 else {
2538 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2539 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540
Joachim Protze82e94a52017-11-01 10:08:30 +00002541#if OMPT_SUPPORT && OMPT_OPTIONAL
2542 // This is the case, if called from omp_init_lock_with_hint:
2543 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2544 if (!codeptr)
2545 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2546 if (ompt_enabled.ompt_callback_lock_destroy) {
2547 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002548 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002549 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002550#endif
2551
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002553 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554#endif /* USE_ITT_BUILD */
2555
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002557
Jonathan Peyton30419822017-05-12 18:01:32 +00002558 if ((__kmp_user_lock_kind == lk_tas) &&
2559 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2560 OMP_NEST_LOCK_T_SIZE)) {
2561 ;
2562 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002563#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002564 else if ((__kmp_user_lock_kind == lk_futex) &&
2565 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2566 OMP_NEST_LOCK_T_SIZE)) {
2567 ;
2568 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002570 else {
2571 __kmp_user_lock_free(user_lock, gtid, lck);
2572 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002573#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002574} // __kmpc_destroy_nest_lock
2575
Jonathan Peyton30419822017-05-12 18:01:32 +00002576void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2577 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002578#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002579 int tag = KMP_EXTRACT_D_TAG(user_lock);
2580#if USE_ITT_BUILD
2581 __kmp_itt_lock_acquiring(
2582 (kmp_user_lock_p)
2583 user_lock); // itt function will get to the right lock object.
2584#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002585#if OMPT_SUPPORT && OMPT_OPTIONAL
2586 // This is the case, if called from omp_init_lock_with_hint:
2587 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2588 if (!codeptr)
2589 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2590 if (ompt_enabled.ompt_callback_mutex_acquire) {
2591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2592 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002593 __ompt_get_mutex_impl_type(user_lock),
2594 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002595 }
2596#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002597#if KMP_USE_INLINED_TAS
2598 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2599 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2600 } else
2601#elif KMP_USE_INLINED_FUTEX
2602 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2603 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2604 } else
2605#endif
2606 {
2607 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2608 }
2609#if USE_ITT_BUILD
2610 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2611#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002612#if OMPT_SUPPORT && OMPT_OPTIONAL
2613 if (ompt_enabled.ompt_callback_mutex_acquired) {
2614 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002615 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002616 }
2617#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002618
2619#else // KMP_USE_DYNAMIC_LOCK
2620
Jonathan Peyton30419822017-05-12 18:01:32 +00002621 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622
Jonathan Peyton30419822017-05-12 18:01:32 +00002623 if ((__kmp_user_lock_kind == lk_tas) &&
2624 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2625 lck = (kmp_user_lock_p)user_lock;
2626 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002627#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002628 else if ((__kmp_user_lock_kind == lk_futex) &&
2629 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2631 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 else {
2634 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2635 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636
2637#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002638 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002640#if OMPT_SUPPORT && OMPT_OPTIONAL
2641 // This is the case, if called from omp_init_lock_with_hint:
2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643 if (!codeptr)
2644 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645 if (ompt_enabled.ompt_callback_mutex_acquire) {
2646 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2647 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002648 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002649 }
2650#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651
Jonathan Peyton30419822017-05-12 18:01:32 +00002652 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653
2654#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002655 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002656#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657
Joachim Protze82e94a52017-11-01 10:08:30 +00002658#if OMPT_SUPPORT && OMPT_OPTIONAL
2659 if (ompt_enabled.ompt_callback_mutex_acquired) {
2660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002661 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002662 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002663#endif
2664
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002665#endif // KMP_USE_DYNAMIC_LOCK
2666}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667
Jonathan Peyton30419822017-05-12 18:01:32 +00002668void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002669#if KMP_USE_DYNAMIC_LOCK
2670
Jonathan Peyton30419822017-05-12 18:01:32 +00002671#if USE_ITT_BUILD
2672 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2673#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002674#if OMPT_SUPPORT && OMPT_OPTIONAL
2675 // This is the case, if called from omp_init_lock_with_hint:
2676 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2677 if (!codeptr)
2678 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2679 if (ompt_enabled.enabled) {
2680 if (ompt_enabled.ompt_callback_mutex_acquire) {
2681 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2682 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002683 __ompt_get_mutex_impl_type(user_lock),
2684 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002685 }
2686 }
2687#endif
2688 int acquire_status =
2689 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002690 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002691#if USE_ITT_BUILD
2692 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002693#endif
2694
Joachim Protze82e94a52017-11-01 10:08:30 +00002695#if OMPT_SUPPORT && OMPT_OPTIONAL
2696 if (ompt_enabled.enabled) {
2697 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2698 if (ompt_enabled.ompt_callback_mutex_acquired) {
2699 // lock_first
2700 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002701 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2702 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002703 }
2704 } else {
2705 if (ompt_enabled.ompt_callback_nest_lock) {
2706 // lock_next
2707 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002708 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002709 }
2710 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002712#endif
2713
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002714#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002715 int acquire_status;
2716 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717
Jonathan Peyton30419822017-05-12 18:01:32 +00002718 if ((__kmp_user_lock_kind == lk_tas) &&
2719 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2720 OMP_NEST_LOCK_T_SIZE)) {
2721 lck = (kmp_user_lock_p)user_lock;
2722 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002723#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 else if ((__kmp_user_lock_kind == lk_futex) &&
2725 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2726 OMP_NEST_LOCK_T_SIZE)) {
2727 lck = (kmp_user_lock_p)user_lock;
2728 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 else {
2731 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2732 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733
2734#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002737#if OMPT_SUPPORT && OMPT_OPTIONAL
2738 // This is the case, if called from omp_init_lock_with_hint:
2739 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2740 if (!codeptr)
2741 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2742 if (ompt_enabled.enabled) {
2743 if (ompt_enabled.ompt_callback_mutex_acquire) {
2744 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2745 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002746 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2747 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002748 }
2749 }
2750#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002751
Jonathan Peyton30419822017-05-12 18:01:32 +00002752 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753
2754#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002755 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002756#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002757
Joachim Protze82e94a52017-11-01 10:08:30 +00002758#if OMPT_SUPPORT && OMPT_OPTIONAL
2759 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002760 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002761 if (ompt_enabled.ompt_callback_mutex_acquired) {
2762 // lock_first
2763 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002764 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002765 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002766 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002767 if (ompt_enabled.ompt_callback_nest_lock) {
2768 // lock_next
2769 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002770 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002771 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002772 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002773 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002774#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002775
2776#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777}
2778
Jonathan Peyton30419822017-05-12 18:01:32 +00002779void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002780#if KMP_USE_DYNAMIC_LOCK
2781
Jonathan Peyton30419822017-05-12 18:01:32 +00002782 int tag = KMP_EXTRACT_D_TAG(user_lock);
2783#if USE_ITT_BUILD
2784 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2785#endif
2786#if KMP_USE_INLINED_TAS
2787 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2788 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2789 } else
2790#elif KMP_USE_INLINED_FUTEX
2791 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2792 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2793 } else
2794#endif
2795 {
2796 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2797 }
2798
Joachim Protze82e94a52017-11-01 10:08:30 +00002799#if OMPT_SUPPORT && OMPT_OPTIONAL
2800 // This is the case, if called from omp_init_lock_with_hint:
2801 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2802 if (!codeptr)
2803 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2804 if (ompt_enabled.ompt_callback_mutex_released) {
2805 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002806 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002807 }
2808#endif
2809
Jonathan Peyton30419822017-05-12 18:01:32 +00002810#else // KMP_USE_DYNAMIC_LOCK
2811
2812 kmp_user_lock_p lck;
2813
2814 /* Can't use serial interval since not block structured */
2815 /* release the lock */
2816
2817 if ((__kmp_user_lock_kind == lk_tas) &&
2818 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2819#if KMP_OS_LINUX && \
2820 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2821// "fast" path implemented to fix customer performance issue
2822#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002823 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002825 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2826 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002827
2828#if OMPT_SUPPORT && OMPT_OPTIONAL
2829 // This is the case, if called from omp_init_lock_with_hint:
2830 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2831 if (!codeptr)
2832 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2833 if (ompt_enabled.ompt_callback_mutex_released) {
2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002835 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002836 }
2837#endif
2838
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002840#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002844#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 else if ((__kmp_user_lock_kind == lk_futex) &&
2846 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2847 lck = (kmp_user_lock_p)user_lock;
2848 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002849#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002850 else {
2851 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2852 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002853
2854#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002855 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002856#endif /* USE_ITT_BUILD */
2857
Jonathan Peyton30419822017-05-12 18:01:32 +00002858 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002859
Joachim Protze82e94a52017-11-01 10:08:30 +00002860#if OMPT_SUPPORT && OMPT_OPTIONAL
2861 // This is the case, if called from omp_init_lock_with_hint:
2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2863 if (!codeptr)
2864 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2865 if (ompt_enabled.ompt_callback_mutex_released) {
2866 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002867 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002868 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002869#endif
2870
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002871#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872}
2873
2874/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002875void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002876#if KMP_USE_DYNAMIC_LOCK
2877
Jonathan Peyton30419822017-05-12 18:01:32 +00002878#if USE_ITT_BUILD
2879 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2880#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002881 int release_status =
2882 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002883 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002884
2885#if OMPT_SUPPORT && OMPT_OPTIONAL
2886 // This is the case, if called from omp_init_lock_with_hint:
2887 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2888 if (!codeptr)
2889 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2890 if (ompt_enabled.enabled) {
2891 if (release_status == KMP_LOCK_RELEASED) {
2892 if (ompt_enabled.ompt_callback_mutex_released) {
2893 // release_lock_last
2894 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002895 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2896 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002897 }
2898 } else if (ompt_enabled.ompt_callback_nest_lock) {
2899 // release_lock_prev
2900 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002901 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002902 }
2903 }
2904#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002905
2906#else // KMP_USE_DYNAMIC_LOCK
2907
2908 kmp_user_lock_p lck;
2909
2910 /* Can't use serial interval since not block structured */
2911
2912 if ((__kmp_user_lock_kind == lk_tas) &&
2913 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2914 OMP_NEST_LOCK_T_SIZE)) {
2915#if KMP_OS_LINUX && \
2916 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2917 // "fast" path implemented to fix customer performance issue
2918 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2919#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002920 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002921#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002922
2923#if OMPT_SUPPORT && OMPT_OPTIONAL
2924 int release_status = KMP_LOCK_STILL_HELD;
2925#endif
2926
Jonathan Peyton30419822017-05-12 18:01:32 +00002927 if (--(tl->lk.depth_locked) == 0) {
2928 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002929#if OMPT_SUPPORT && OMPT_OPTIONAL
2930 release_status = KMP_LOCK_RELEASED;
2931#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002932 }
2933 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002934
2935#if OMPT_SUPPORT && OMPT_OPTIONAL
2936 // This is the case, if called from omp_init_lock_with_hint:
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2938 if (!codeptr)
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940 if (ompt_enabled.enabled) {
2941 if (release_status == KMP_LOCK_RELEASED) {
2942 if (ompt_enabled.ompt_callback_mutex_released) {
2943 // release_lock_last
2944 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002945 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002946 }
2947 } else if (ompt_enabled.ompt_callback_nest_lock) {
2948 // release_lock_previous
2949 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002950 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002951 }
2952 }
2953#endif
2954
Jonathan Peyton30419822017-05-12 18:01:32 +00002955 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002956#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002959 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002960#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002961 else if ((__kmp_user_lock_kind == lk_futex) &&
2962 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2963 OMP_NEST_LOCK_T_SIZE)) {
2964 lck = (kmp_user_lock_p)user_lock;
2965 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002967 else {
2968 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2969 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002970
2971#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002972 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973#endif /* USE_ITT_BUILD */
2974
Jonathan Peyton30419822017-05-12 18:01:32 +00002975 int release_status;
2976 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002977#if OMPT_SUPPORT && OMPT_OPTIONAL
2978 // This is the case, if called from omp_init_lock_with_hint:
2979 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2980 if (!codeptr)
2981 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2982 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002983 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002984 if (ompt_enabled.ompt_callback_mutex_released) {
2985 // release_lock_last
2986 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002987 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002988 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002989 } else if (ompt_enabled.ompt_callback_nest_lock) {
2990 // release_lock_previous
2991 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002992 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002993 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002994 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002995#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002996
2997#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002998}
2999
3000/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003001int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3002 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003003
3004#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003005 int rc;
3006 int tag = KMP_EXTRACT_D_TAG(user_lock);
3007#if USE_ITT_BUILD
3008 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3009#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003010#if OMPT_SUPPORT && OMPT_OPTIONAL
3011 // This is the case, if called from omp_init_lock_with_hint:
3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3013 if (!codeptr)
3014 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3015 if (ompt_enabled.ompt_callback_mutex_acquire) {
3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3017 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003018 __ompt_get_mutex_impl_type(user_lock),
3019 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003020 }
3021#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003022#if KMP_USE_INLINED_TAS
3023 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3024 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3025 } else
3026#elif KMP_USE_INLINED_FUTEX
3027 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3028 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3029 } else
3030#endif
3031 {
3032 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3033 }
3034 if (rc) {
3035#if USE_ITT_BUILD
3036 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3037#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003038#if OMPT_SUPPORT && OMPT_OPTIONAL
3039 if (ompt_enabled.ompt_callback_mutex_acquired) {
3040 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003041 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003042 }
3043#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003044 return FTN_TRUE;
3045 } else {
3046#if USE_ITT_BUILD
3047 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3048#endif
3049 return FTN_FALSE;
3050 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003051
3052#else // KMP_USE_DYNAMIC_LOCK
3053
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 kmp_user_lock_p lck;
3055 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056
Jonathan Peyton30419822017-05-12 18:01:32 +00003057 if ((__kmp_user_lock_kind == lk_tas) &&
3058 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3059 lck = (kmp_user_lock_p)user_lock;
3060 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003061#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003062 else if ((__kmp_user_lock_kind == lk_futex) &&
3063 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3064 lck = (kmp_user_lock_p)user_lock;
3065 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003067 else {
3068 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3069 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003070
3071#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003073#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003074#if OMPT_SUPPORT && OMPT_OPTIONAL
3075 // This is the case, if called from omp_init_lock_with_hint:
3076 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3077 if (!codeptr)
3078 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3079 if (ompt_enabled.ompt_callback_mutex_acquire) {
3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3081 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00003082 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003083 }
3084#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003085
Jonathan Peyton30419822017-05-12 18:01:32 +00003086 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003088 if (rc) {
3089 __kmp_itt_lock_acquired(lck);
3090 } else {
3091 __kmp_itt_lock_cancelled(lck);
3092 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003094#if OMPT_SUPPORT && OMPT_OPTIONAL
3095 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003098 }
3099#endif
3100
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102
Jonathan Peyton30419822017-05-12 18:01:32 +00003103/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003104
3105#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106}
3107
3108/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003109int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003110#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003111 int rc;
3112#if USE_ITT_BUILD
3113 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3114#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.ompt_callback_mutex_acquire) {
3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3122 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003123 __ompt_get_mutex_impl_type(user_lock),
3124 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003125 }
3126#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003127 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3128#if USE_ITT_BUILD
3129 if (rc) {
3130 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3131 } else {
3132 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3133 }
3134#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003135#if OMPT_SUPPORT && OMPT_OPTIONAL
3136 if (ompt_enabled.enabled && rc) {
3137 if (rc == 1) {
3138 if (ompt_enabled.ompt_callback_mutex_acquired) {
3139 // lock_first
3140 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003141 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3142 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003143 }
3144 } else {
3145 if (ompt_enabled.ompt_callback_nest_lock) {
3146 // lock_next
3147 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003148 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003149 }
3150 }
3151 }
3152#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003153 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003154
3155#else // KMP_USE_DYNAMIC_LOCK
3156
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 kmp_user_lock_p lck;
3158 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003159
Jonathan Peyton30419822017-05-12 18:01:32 +00003160 if ((__kmp_user_lock_kind == lk_tas) &&
3161 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3162 OMP_NEST_LOCK_T_SIZE)) {
3163 lck = (kmp_user_lock_p)user_lock;
3164 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003165#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 else if ((__kmp_user_lock_kind == lk_futex) &&
3167 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3168 OMP_NEST_LOCK_T_SIZE)) {
3169 lck = (kmp_user_lock_p)user_lock;
3170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003171#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003172 else {
3173 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003175
3176#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003178#endif /* USE_ITT_BUILD */
3179
Joachim Protze82e94a52017-11-01 10:08:30 +00003180#if OMPT_SUPPORT && OMPT_OPTIONAL
3181 // This is the case, if called from omp_init_lock_with_hint:
3182 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3183 if (!codeptr)
3184 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3185 if (ompt_enabled.enabled) &&
3186 ompt_enabled.ompt_callback_mutex_acquire) {
3187 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3188 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003189 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3190 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003191 }
3192#endif
3193
Jonathan Peyton30419822017-05-12 18:01:32 +00003194 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003195#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003196 if (rc) {
3197 __kmp_itt_lock_acquired(lck);
3198 } else {
3199 __kmp_itt_lock_cancelled(lck);
3200 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003202#if OMPT_SUPPORT && OMPT_OPTIONAL
3203 if (ompt_enabled.enabled && rc) {
3204 if (rc == 1) {
3205 if (ompt_enabled.ompt_callback_mutex_acquired) {
3206 // lock_first
3207 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003208 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003209 }
3210 } else {
3211 if (ompt_enabled.ompt_callback_nest_lock) {
3212 // lock_next
3213 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003214 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003215 }
3216 }
3217 }
3218#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003219 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220
Jonathan Peyton30419822017-05-12 18:01:32 +00003221/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003222
3223#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224}
3225
Jonathan Peyton30419822017-05-12 18:01:32 +00003226// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003227
Jonathan Peyton30419822017-05-12 18:01:32 +00003228// keep the selected method in a thread local structure for cross-function
3229// usage: will be used in __kmpc_end_reduce* functions;
3230// another solution: to re-determine the method one more time in
3231// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003233#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3234 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003235
Jonathan Peyton30419822017-05-12 18:01:32 +00003236#define __KMP_GET_REDUCTION_METHOD(gtid) \
3237 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003238
Jonathan Peyton30419822017-05-12 18:01:32 +00003239// description of the packed_reduction_method variable: look at the macros in
3240// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003241
3242// used in a critical section reduce block
3243static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003244__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3245 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003246
Jonathan Peyton30419822017-05-12 18:01:32 +00003247 // this lock was visible to a customer and to the threading profile tool as a
3248 // serial overhead span (although it's used for an internal purpose only)
3249 // why was it visible in previous implementation?
3250 // should we keep it visible in new reduce block?
3251 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003252
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003253#if KMP_USE_DYNAMIC_LOCK
3254
Jonathan Peyton30419822017-05-12 18:01:32 +00003255 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3256 // Check if it is initialized.
3257 if (*lk == 0) {
3258 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3259 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3260 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003261 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003262 __kmp_init_indirect_csptr(crit, loc, global_tid,
3263 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003264 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003265 }
3266 // Branch for accessing the actual lock object and set operation. This
3267 // branching is inevitable since this lock initialization does not follow the
3268 // normal dispatch path (lock table is not used).
3269 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3270 lck = (kmp_user_lock_p)lk;
3271 KMP_DEBUG_ASSERT(lck != NULL);
3272 if (__kmp_env_consistency_check) {
3273 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3274 }
3275 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3276 } else {
3277 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3278 lck = ilk->lock;
3279 KMP_DEBUG_ASSERT(lck != NULL);
3280 if (__kmp_env_consistency_check) {
3281 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3282 }
3283 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3284 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003285
3286#else // KMP_USE_DYNAMIC_LOCK
3287
Jonathan Peyton30419822017-05-12 18:01:32 +00003288 // We know that the fast reduction code is only emitted by Intel compilers
3289 // with 32 byte critical sections. If there isn't enough space, then we
3290 // have to use a pointer.
3291 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3292 lck = (kmp_user_lock_p)crit;
3293 } else {
3294 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3295 }
3296 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297
Jonathan Peyton30419822017-05-12 18:01:32 +00003298 if (__kmp_env_consistency_check)
3299 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003300
Jonathan Peyton30419822017-05-12 18:01:32 +00003301 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003302
3303#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003304}
3305
3306// used in a critical section reduce block
3307static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003308__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3309 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003310
Jonathan Peyton30419822017-05-12 18:01:32 +00003311 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003312
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003313#if KMP_USE_DYNAMIC_LOCK
3314
Jonathan Peyton30419822017-05-12 18:01:32 +00003315 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3316 lck = (kmp_user_lock_p)crit;
3317 if (__kmp_env_consistency_check)
3318 __kmp_pop_sync(global_tid, ct_critical, loc);
3319 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3320 } else {
3321 kmp_indirect_lock_t *ilk =
3322 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3323 if (__kmp_env_consistency_check)
3324 __kmp_pop_sync(global_tid, ct_critical, loc);
3325 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3326 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003327
3328#else // KMP_USE_DYNAMIC_LOCK
3329
Jonathan Peyton30419822017-05-12 18:01:32 +00003330 // We know that the fast reduction code is only emitted by Intel compilers
3331 // with 32 byte critical sections. If there isn't enough space, then we have
3332 // to use a pointer.
3333 if (__kmp_base_user_lock_size > 32) {
3334 lck = *((kmp_user_lock_p *)crit);
3335 KMP_ASSERT(lck != NULL);
3336 } else {
3337 lck = (kmp_user_lock_p)crit;
3338 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003339
Jonathan Peyton30419822017-05-12 18:01:32 +00003340 if (__kmp_env_consistency_check)
3341 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003342
Jonathan Peyton30419822017-05-12 18:01:32 +00003343 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003344
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003345#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003346} // __kmp_end_critical_section_reduce_block
3347
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003348#if OMP_40_ENABLED
3349static __forceinline int
3350__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3351 int *task_state) {
3352 kmp_team_t *team;
3353
3354 // Check if we are inside the teams construct?
3355 if (th->th.th_teams_microtask) {
3356 *team_p = team = th->th.th_team;
3357 if (team->t.t_level == th->th.th_teams_level) {
3358 // This is reduction at teams construct.
3359 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3360 // Let's swap teams temporarily for the reduction.
3361 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3362 th->th.th_team = team->t.t_parent;
3363 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3364 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3365 *task_state = th->th.th_task_state;
3366 th->th.th_task_state = 0;
3367
3368 return 1;
3369 }
3370 }
3371 return 0;
3372}
3373
3374static __forceinline void
3375__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3376 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3377 th->th.th_info.ds.ds_tid = 0;
3378 th->th.th_team = team;
3379 th->th.th_team_nproc = team->t.t_nproc;
3380 th->th.th_task_team = team->t.t_task_team[task_state];
3381 th->th.th_task_state = task_state;
3382}
3383#endif
3384
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385/* 2.a.i. Reduce Block without a terminating barrier */
3386/*!
3387@ingroup SYNCHRONIZATION
3388@param loc source location information
3389@param global_tid global thread number
3390@param num_vars number of items (variables) to be reduced
3391@param reduce_size size of data in bytes to be reduced
3392@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003393@param reduce_func callback function providing reduction operation on two
3394operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003395@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003396@result 1 for the master thread, 0 for all other team threads, 2 for all team
3397threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398
3399The nowait version is used for a reduce clause with the nowait argument.
3400*/
3401kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003402__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3403 size_t reduce_size, void *reduce_data,
3404 void (*reduce_func)(void *lhs_data, void *rhs_data),
3405 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003406
Jonathan Peyton30419822017-05-12 18:01:32 +00003407 KMP_COUNT_BLOCK(REDUCE_nowait);
3408 int retval = 0;
3409 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003410#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003412 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003413 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003414#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003415 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003416
Jonathan Peyton30419822017-05-12 18:01:32 +00003417 // why do we need this initialization here at all?
3418 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 // do not call __kmp_serial_initialize(), it will be called by
3421 // __kmp_parallel_initialize() if needed
3422 // possible detection of false-positive race by the threadchecker ???
3423 if (!TCR_4(__kmp_init_parallel))
3424 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003425
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003426#if OMP_50_ENABLED
3427 __kmp_resume_if_soft_paused();
3428#endif
3429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003431#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003432 if (__kmp_env_consistency_check)
3433 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003434#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003435 if (__kmp_env_consistency_check)
3436 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003437#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003439#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003440 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003441 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003442#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003443
Jonathan Peyton30419822017-05-12 18:01:32 +00003444 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3445 // the value should be kept in a variable
3446 // the variable should be either a construct-specific or thread-specific
3447 // property, not a team specific property
3448 // (a thread can reach the next reduce block on the next construct, reduce
3449 // method may differ on the next construct)
3450 // an ident_t "loc" parameter could be used as a construct-specific property
3451 // (what if loc == 0?)
3452 // (if both construct-specific and team-specific variables were shared,
3453 // then unness extra syncs should be needed)
3454 // a thread-specific variable is better regarding two issues above (next
3455 // construct and extra syncs)
3456 // a thread-specific "th_local.reduction_method" variable is used currently
3457 // each thread executes 'determine' and 'set' lines (no need to execute by one
3458 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 packed_reduction_method = __kmp_determine_reduction_method(
3461 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3462 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003463
Jonathan Peyton30419822017-05-12 18:01:32 +00003464 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465
Jonathan Peyton30419822017-05-12 18:01:32 +00003466 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3467 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468
Jonathan Peyton30419822017-05-12 18:01:32 +00003469 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003470
Jonathan Peyton30419822017-05-12 18:01:32 +00003471 // usage: if team size == 1, no synchronization is required ( Intel
3472 // platforms only )
3473 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003474
Jonathan Peyton30419822017-05-12 18:01:32 +00003475 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003476
Jonathan Peyton30419822017-05-12 18:01:32 +00003477 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003478
Jonathan Peyton30419822017-05-12 18:01:32 +00003479 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3480 // won't be called by the code gen)
3481 // (it's not quite good, because the checking block has been closed by
3482 // this 'pop',
3483 // but atomic operation has not been executed yet, will be executed
3484 // slightly later, literally on next instruction)
3485 if (__kmp_env_consistency_check)
3486 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487
Jonathan Peyton30419822017-05-12 18:01:32 +00003488 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3489 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003490
Jonathan Peyton30419822017-05-12 18:01:32 +00003491// AT: performance issue: a real barrier here
3492// AT: (if master goes slow, other threads are blocked here waiting for the
3493// master to come and release them)
3494// AT: (it's not what a customer might expect specifying NOWAIT clause)
3495// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3496// be confusing to a customer)
3497// AT: another implementation of *barrier_gather*nowait() (or some other design)
3498// might go faster and be more in line with sense of NOWAIT
3499// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003500
Jonathan Peyton30419822017-05-12 18:01:32 +00003501// this barrier should be invisible to a customer and to the threading profile
3502// tool (it's neither a terminating barrier nor customer's code, it's
3503// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003504#if OMPT_SUPPORT
3505 // JP: can this barrier potentially leed to task scheduling?
3506 // JP: as long as there is a barrier in the implementation, OMPT should and
3507 // will provide the barrier events
3508 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003509 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003510 if (ompt_enabled.enabled) {
3511 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003512 if (ompt_frame->enter_frame.ptr == NULL)
3513 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003514 OMPT_STORE_RETURN_ADDRESS(global_tid);
3515 }
3516#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003517#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003519#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003520 retval =
3521 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3522 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3523 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003524#if OMPT_SUPPORT && OMPT_OPTIONAL
3525 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003526 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003527 }
3528#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003529
Jonathan Peyton30419822017-05-12 18:01:32 +00003530 // all other workers except master should do this pop here
3531 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3532 if (__kmp_env_consistency_check) {
3533 if (retval == 0) {
3534 __kmp_pop_sync(global_tid, ct_reduce, loc);
3535 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003536 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003537
3538 } else {
3539
3540 // should never reach this block
3541 KMP_ASSERT(0); // "unexpected method"
3542 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003543#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003545 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003546 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003547#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003548 KA_TRACE(
3549 10,
3550 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3551 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003554}
3555
3556/*!
3557@ingroup SYNCHRONIZATION
3558@param loc source location information
3559@param global_tid global thread id.
3560@param lck pointer to the unique lock data structure
3561
3562Finish the execution of a reduce nowait.
3563*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003564void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3565 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003566
Jonathan Peyton30419822017-05-12 18:01:32 +00003567 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003568
Jonathan Peyton30419822017-05-12 18:01:32 +00003569 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003570
Jonathan Peyton30419822017-05-12 18:01:32 +00003571 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003572
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003576
Jonathan Peyton30419822017-05-12 18:01:32 +00003577 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003578
Jonathan Peyton30419822017-05-12 18:01:32 +00003579 // usage: if team size == 1, no synchronization is required ( on Intel
3580 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003581
Jonathan Peyton30419822017-05-12 18:01:32 +00003582 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003583
Jonathan Peyton30419822017-05-12 18:01:32 +00003584 // neither master nor other workers should get here
3585 // (code gen does not generate this call in case 2: atomic reduce block)
3586 // actually it's better to remove this elseif at all;
3587 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003588
Jonathan Peyton30419822017-05-12 18:01:32 +00003589 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3590 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003591
Jonathan Peyton30419822017-05-12 18:01:32 +00003592 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003593
Jonathan Peyton30419822017-05-12 18:01:32 +00003594 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595
Jonathan Peyton30419822017-05-12 18:01:32 +00003596 // should never reach this block
3597 KMP_ASSERT(0); // "unexpected method"
3598 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599
Jonathan Peyton30419822017-05-12 18:01:32 +00003600 if (__kmp_env_consistency_check)
3601 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003602
Jonathan Peyton30419822017-05-12 18:01:32 +00003603 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3604 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003605
Jonathan Peyton30419822017-05-12 18:01:32 +00003606 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003607}
3608
3609/* 2.a.ii. Reduce Block with a terminating barrier */
3610
3611/*!
3612@ingroup SYNCHRONIZATION
3613@param loc source location information
3614@param global_tid global thread number
3615@param num_vars number of items (variables) to be reduced
3616@param reduce_size size of data in bytes to be reduced
3617@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003618@param reduce_func callback function providing reduction operation on two
3619operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003620@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003621@result 1 for the master thread, 0 for all other team threads, 2 for all team
3622threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003623
3624A blocking reduce that includes an implicit barrier.
3625*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003626kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3627 size_t reduce_size, void *reduce_data,
3628 void (*reduce_func)(void *lhs_data, void *rhs_data),
3629 kmp_critical_name *lck) {
3630 KMP_COUNT_BLOCK(REDUCE_wait);
3631 int retval = 0;
3632 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003633#if OMP_40_ENABLED
3634 kmp_info_t *th;
3635 kmp_team_t *team;
3636 int teams_swapped = 0, task_state;
3637#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003638
Jonathan Peyton30419822017-05-12 18:01:32 +00003639 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003640
Jonathan Peyton30419822017-05-12 18:01:32 +00003641 // why do we need this initialization here at all?
3642 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
Jonathan Peyton30419822017-05-12 18:01:32 +00003644 // do not call __kmp_serial_initialize(), it will be called by
3645 // __kmp_parallel_initialize() if needed
3646 // possible detection of false-positive race by the threadchecker ???
3647 if (!TCR_4(__kmp_init_parallel))
3648 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003649
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003650#if OMP_50_ENABLED
3651 __kmp_resume_if_soft_paused();
3652#endif
3653
Jonathan Peyton30419822017-05-12 18:01:32 +00003654// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003655#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003656 if (__kmp_env_consistency_check)
3657 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003658#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 if (__kmp_env_consistency_check)
3660 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003661#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003662
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003663#if OMP_40_ENABLED
3664 th = __kmp_thread_from_gtid(global_tid);
3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666#endif // OMP_40_ENABLED
3667
Jonathan Peyton30419822017-05-12 18:01:32 +00003668 packed_reduction_method = __kmp_determine_reduction_method(
3669 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3670 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003671
Jonathan Peyton30419822017-05-12 18:01:32 +00003672 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003673
Jonathan Peyton30419822017-05-12 18:01:32 +00003674 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3675 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676
Jonathan Peyton30419822017-05-12 18:01:32 +00003677 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003678
Jonathan Peyton30419822017-05-12 18:01:32 +00003679 // usage: if team size == 1, no synchronization is required ( Intel
3680 // platforms only )
3681 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003684
Jonathan Peyton30419822017-05-12 18:01:32 +00003685 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003686
Jonathan Peyton30419822017-05-12 18:01:32 +00003687 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3688 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689
Jonathan Peyton30419822017-05-12 18:01:32 +00003690// case tree_reduce_block:
3691// this barrier should be visible to a customer and to the threading profile
3692// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003693#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003694 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003695 if (ompt_enabled.enabled) {
3696 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003697 if (ompt_frame->enter_frame.ptr == NULL)
3698 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003699 OMPT_STORE_RETURN_ADDRESS(global_tid);
3700 }
3701#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003702#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003703 __kmp_threads[global_tid]->th.th_ident =
3704 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003705#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003706 retval =
3707 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3708 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3709 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003710#if OMPT_SUPPORT && OMPT_OPTIONAL
3711 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003712 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003713 }
3714#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003715
Jonathan Peyton30419822017-05-12 18:01:32 +00003716 // all other workers except master should do this pop here
3717 // ( none of other workers except master will enter __kmpc_end_reduce() )
3718 if (__kmp_env_consistency_check) {
3719 if (retval == 0) { // 0: all other workers; 1: master
3720 __kmp_pop_sync(global_tid, ct_reduce, loc);
3721 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003722 }
3723
Jonathan Peyton30419822017-05-12 18:01:32 +00003724 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003725
Jonathan Peyton30419822017-05-12 18:01:32 +00003726 // should never reach this block
3727 KMP_ASSERT(0); // "unexpected method"
3728 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003729#if OMP_40_ENABLED
3730 if (teams_swapped) {
3731 __kmp_restore_swapped_teams(th, team, task_state);
3732 }
3733#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003734
3735 KA_TRACE(10,
3736 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3737 global_tid, packed_reduction_method, retval));
3738
3739 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740}
3741
3742/*!
3743@ingroup SYNCHRONIZATION
3744@param loc source location information
3745@param global_tid global thread id.
3746@param lck pointer to the unique lock data structure
3747
3748Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003749The <tt>lck</tt> pointer must be the same as that used in the corresponding
3750start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003751*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003752void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3753 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754
Jonathan Peyton30419822017-05-12 18:01:32 +00003755 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003756#if OMP_40_ENABLED
3757 kmp_info_t *th;
3758 kmp_team_t *team;
3759 int teams_swapped = 0, task_state;
3760#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003761
Jonathan Peyton30419822017-05-12 18:01:32 +00003762 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003763
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003764#if OMP_40_ENABLED
3765 th = __kmp_thread_from_gtid(global_tid);
3766 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3767#endif // OMP_40_ENABLED
3768
Jonathan Peyton30419822017-05-12 18:01:32 +00003769 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003770
Jonathan Peyton30419822017-05-12 18:01:32 +00003771 // this barrier should be visible to a customer and to the threading profile
3772 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003773
Jonathan Peyton30419822017-05-12 18:01:32 +00003774 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003775
Jonathan Peyton30419822017-05-12 18:01:32 +00003776 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777
Jonathan Peyton30419822017-05-12 18:01:32 +00003778// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003779#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003780 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003781 if (ompt_enabled.enabled) {
3782 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003783 if (ompt_frame->enter_frame.ptr == NULL)
3784 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003785 OMPT_STORE_RETURN_ADDRESS(global_tid);
3786 }
3787#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003788#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003789 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003791 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003792#if OMPT_SUPPORT && OMPT_OPTIONAL
3793 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003794 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003795 }
3796#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003797
Jonathan Peyton30419822017-05-12 18:01:32 +00003798 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003801
Jonathan Peyton30419822017-05-12 18:01:32 +00003802// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003803#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003804 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003805 if (ompt_enabled.enabled) {
3806 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003807 if (ompt_frame->enter_frame.ptr == NULL)
3808 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003809 OMPT_STORE_RETURN_ADDRESS(global_tid);
3810 }
3811#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003812#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003813 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003814#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003815 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003816#if OMPT_SUPPORT && OMPT_OPTIONAL
3817 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003818 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003819 }
3820#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003821
Jonathan Peyton30419822017-05-12 18:01:32 +00003822 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003823
Joachim Protze82e94a52017-11-01 10:08:30 +00003824#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003825 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003826 if (ompt_enabled.enabled) {
3827 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003828 if (ompt_frame->enter_frame.ptr == NULL)
3829 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003830 OMPT_STORE_RETURN_ADDRESS(global_tid);
3831 }
3832#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003833// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003834#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003835 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003836#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003837 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003838#if OMPT_SUPPORT && OMPT_OPTIONAL
3839 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003840 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003841 }
3842#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843
Jonathan Peyton30419822017-05-12 18:01:32 +00003844 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3845 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 // only master executes here (master releases all other workers)
3848 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3849 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003850
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003852
Jonathan Peyton30419822017-05-12 18:01:32 +00003853 // should never reach this block
3854 KMP_ASSERT(0); // "unexpected method"
3855 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003856#if OMP_40_ENABLED
3857 if (teams_swapped) {
3858 __kmp_restore_swapped_teams(th, team, task_state);
3859 }
3860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003861
Jonathan Peyton30419822017-05-12 18:01:32 +00003862 if (__kmp_env_consistency_check)
3863 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864
Jonathan Peyton30419822017-05-12 18:01:32 +00003865 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3866 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003867
Jonathan Peyton30419822017-05-12 18:01:32 +00003868 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003869}
3870
3871#undef __KMP_GET_REDUCTION_METHOD
3872#undef __KMP_SET_REDUCTION_METHOD
3873
Jonathan Peyton30419822017-05-12 18:01:32 +00003874/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003875
Jonathan Peyton30419822017-05-12 18:01:32 +00003876kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003877
Jonathan Peyton30419822017-05-12 18:01:32 +00003878 kmp_int32 gtid;
3879 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003880
Jonathan Peyton30419822017-05-12 18:01:32 +00003881 gtid = __kmp_get_gtid();
3882 if (gtid < 0) {
3883 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003884 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003885 thread = __kmp_thread_from_gtid(gtid);
3886 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003887
3888} // __kmpc_get_taskid
3889
Jonathan Peyton30419822017-05-12 18:01:32 +00003890kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003891
Jonathan Peyton30419822017-05-12 18:01:32 +00003892 kmp_int32 gtid;
3893 kmp_info_t *thread;
3894 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003895
Jonathan Peyton30419822017-05-12 18:01:32 +00003896 gtid = __kmp_get_gtid();
3897 if (gtid < 0) {
3898 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003899 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 thread = __kmp_thread_from_gtid(gtid);
3901 parent_task = thread->th.th_current_task->td_parent;
3902 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003903
3904} // __kmpc_get_parent_taskid
3905
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003906#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003907/*!
3908@ingroup WORK_SHARING
3909@param loc source location information.
3910@param gtid global thread number.
3911@param num_dims number of associated doacross loops.
3912@param dims info on loops bounds.
3913
3914Initialize doacross loop information.
3915Expect compiler send us inclusive bounds,
3916e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3917*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003918void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003919 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003920 int j, idx;
3921 kmp_int64 last, trace_count;
3922 kmp_info_t *th = __kmp_threads[gtid];
3923 kmp_team_t *team = th->th.th_team;
3924 kmp_uint32 *flags;
3925 kmp_disp_t *pr_buf = th->th.th_dispatch;
3926 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003927
Jonathan Peyton30419822017-05-12 18:01:32 +00003928 KA_TRACE(
3929 20,
3930 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3931 gtid, num_dims, !team->t.t_serialized));
3932 KMP_DEBUG_ASSERT(dims != NULL);
3933 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003934
Jonathan Peyton30419822017-05-12 18:01:32 +00003935 if (team->t.t_serialized) {
3936 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3937 return; // no dependencies if team is serialized
3938 }
3939 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3940 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3941 // the next loop
3942 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003943
Jonathan Peyton30419822017-05-12 18:01:32 +00003944 // Save bounds info into allocated private buffer
3945 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3946 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3947 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3948 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3949 pr_buf->th_doacross_info[0] =
3950 (kmp_int64)num_dims; // first element is number of dimensions
3951 // Save also address of num_done in order to access it later without knowing
3952 // the buffer index
3953 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3954 pr_buf->th_doacross_info[2] = dims[0].lo;
3955 pr_buf->th_doacross_info[3] = dims[0].up;
3956 pr_buf->th_doacross_info[4] = dims[0].st;
3957 last = 5;
3958 for (j = 1; j < num_dims; ++j) {
3959 kmp_int64
3960 range_length; // To keep ranges of all dimensions but the first dims[0]
3961 if (dims[j].st == 1) { // most common case
3962 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3963 range_length = dims[j].up - dims[j].lo + 1;
3964 } else {
3965 if (dims[j].st > 0) {
3966 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3967 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3968 } else { // negative increment
3969 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3970 range_length =
3971 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3972 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003973 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003974 pr_buf->th_doacross_info[last++] = range_length;
3975 pr_buf->th_doacross_info[last++] = dims[j].lo;
3976 pr_buf->th_doacross_info[last++] = dims[j].up;
3977 pr_buf->th_doacross_info[last++] = dims[j].st;
3978 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003979
Jonathan Peyton30419822017-05-12 18:01:32 +00003980 // Compute total trip count.
3981 // Start with range of dims[0] which we don't need to keep in the buffer.
3982 if (dims[0].st == 1) { // most common case
3983 trace_count = dims[0].up - dims[0].lo + 1;
3984 } else if (dims[0].st > 0) {
3985 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3986 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3987 } else { // negative increment
3988 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3989 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3990 }
3991 for (j = 1; j < num_dims; ++j) {
3992 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3993 }
3994 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003995
Jonathan Peyton30419822017-05-12 18:01:32 +00003996 // Check if shared buffer is not occupied by other loop (idx -
3997 // __kmp_dispatch_num_buffers)
3998 if (idx != sh_buf->doacross_buf_idx) {
3999 // Shared buffer is occupied, wait for it to be free
Jonathan Peytone47d32f2019-02-28 19:11:29 +00004000 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4001 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00004002 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004003#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00004004 // Check if we are the first thread. After the CAS the first thread gets 0,
4005 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004006 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4007 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4008 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4009#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004011 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4012#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 if (flags == NULL) {
4014 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004015 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004016 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4017 KMP_MB();
4018 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004019 } else if (flags == (kmp_uint32 *)1) {
4020#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00004021 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004022 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4023#else
4024 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4025#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004026 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004027 KMP_MB();
4028 } else {
4029 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004031 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00004032 pr_buf->th_doacross_flags =
4033 sh_buf->doacross_flags; // save private copy in order to not
4034 // touch shared buffer on each iteration
4035 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004036}
4037
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004038void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004039 kmp_int32 shft, num_dims, i;
4040 kmp_uint32 flag;
4041 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4042 kmp_info_t *th = __kmp_threads[gtid];
4043 kmp_team_t *team = th->th.th_team;
4044 kmp_disp_t *pr_buf;
4045 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004046
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4048 if (team->t.t_serialized) {
4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4050 return; // no dependencies if team is serialized
4051 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004052
Jonathan Peyton30419822017-05-12 18:01:32 +00004053 // calculate sequential iteration number and check out-of-bounds condition
4054 pr_buf = th->th.th_dispatch;
4055 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4056 num_dims = pr_buf->th_doacross_info[0];
4057 lo = pr_buf->th_doacross_info[2];
4058 up = pr_buf->th_doacross_info[3];
4059 st = pr_buf->th_doacross_info[4];
4060 if (st == 1) { // most common case
4061 if (vec[0] < lo || vec[0] > up) {
4062 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4063 "bounds [%lld,%lld]\n",
4064 gtid, vec[0], lo, up));
4065 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004066 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004067 iter_number = vec[0] - lo;
4068 } else if (st > 0) {
4069 if (vec[0] < lo || vec[0] > up) {
4070 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4071 "bounds [%lld,%lld]\n",
4072 gtid, vec[0], lo, up));
4073 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004075 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4076 } else { // negative increment
4077 if (vec[0] > lo || vec[0] < up) {
4078 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4079 "bounds [%lld,%lld]\n",
4080 gtid, vec[0], lo, up));
4081 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004082 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004083 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4084 }
4085 for (i = 1; i < num_dims; ++i) {
4086 kmp_int64 iter, ln;
4087 kmp_int32 j = i * 4;
4088 ln = pr_buf->th_doacross_info[j + 1];
4089 lo = pr_buf->th_doacross_info[j + 2];
4090 up = pr_buf->th_doacross_info[j + 3];
4091 st = pr_buf->th_doacross_info[j + 4];
4092 if (st == 1) {
4093 if (vec[i] < lo || vec[i] > up) {
4094 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4095 "bounds [%lld,%lld]\n",
4096 gtid, vec[i], lo, up));
4097 return;
4098 }
4099 iter = vec[i] - lo;
4100 } else if (st > 0) {
4101 if (vec[i] < lo || vec[i] > up) {
4102 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4103 "bounds [%lld,%lld]\n",
4104 gtid, vec[i], lo, up));
4105 return;
4106 }
4107 iter = (kmp_uint64)(vec[i] - lo) / st;
4108 } else { // st < 0
4109 if (vec[i] > lo || vec[i] < up) {
4110 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4111 "bounds [%lld,%lld]\n",
4112 gtid, vec[i], lo, up));
4113 return;
4114 }
4115 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4116 }
4117 iter_number = iter + ln * iter_number;
4118 }
4119 shft = iter_number % 32; // use 32-bit granularity
4120 iter_number >>= 5; // divided by 32
4121 flag = 1 << shft;
4122 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4123 KMP_YIELD(TRUE);
4124 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004125 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004126 KA_TRACE(20,
4127 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4128 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004129}
4130
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004131void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004132 kmp_int32 shft, num_dims, i;
4133 kmp_uint32 flag;
4134 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4135 kmp_info_t *th = __kmp_threads[gtid];
4136 kmp_team_t *team = th->th.th_team;
4137 kmp_disp_t *pr_buf;
4138 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004139
Jonathan Peyton30419822017-05-12 18:01:32 +00004140 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4141 if (team->t.t_serialized) {
4142 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4143 return; // no dependencies if team is serialized
4144 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004145
Jonathan Peyton30419822017-05-12 18:01:32 +00004146 // calculate sequential iteration number (same as in "wait" but no
4147 // out-of-bounds checks)
4148 pr_buf = th->th.th_dispatch;
4149 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4150 num_dims = pr_buf->th_doacross_info[0];
4151 lo = pr_buf->th_doacross_info[2];
4152 st = pr_buf->th_doacross_info[4];
4153 if (st == 1) { // most common case
4154 iter_number = vec[0] - lo;
4155 } else if (st > 0) {
4156 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4157 } else { // negative increment
4158 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4159 }
4160 for (i = 1; i < num_dims; ++i) {
4161 kmp_int64 iter, ln;
4162 kmp_int32 j = i * 4;
4163 ln = pr_buf->th_doacross_info[j + 1];
4164 lo = pr_buf->th_doacross_info[j + 2];
4165 st = pr_buf->th_doacross_info[j + 4];
4166 if (st == 1) {
4167 iter = vec[i] - lo;
4168 } else if (st > 0) {
4169 iter = (kmp_uint64)(vec[i] - lo) / st;
4170 } else { // st < 0
4171 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004172 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004173 iter_number = iter + ln * iter_number;
4174 }
4175 shft = iter_number % 32; // use 32-bit granularity
4176 iter_number >>= 5; // divided by 32
4177 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004178 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004179 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004180 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004181 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4182 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004183}
4184
Jonathan Peyton30419822017-05-12 18:01:32 +00004185void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004186 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004187 kmp_info_t *th = __kmp_threads[gtid];
4188 kmp_team_t *team = th->th.th_team;
4189 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004190
Jonathan Peyton30419822017-05-12 18:01:32 +00004191 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4192 if (team->t.t_serialized) {
4193 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4194 return; // nothing to do
4195 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004196 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004197 if (num_done == th->th.th_team_nproc) {
4198 // we are the last thread, need to free shared resources
4199 int idx = pr_buf->th_doacross_buf_idx - 1;
4200 dispatch_shared_info_t *sh_buf =
4201 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4202 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4203 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004204 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004205 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004206 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004207 sh_buf->doacross_flags = NULL;
4208 sh_buf->doacross_num_done = 0;
4209 sh_buf->doacross_buf_idx +=
4210 __kmp_dispatch_num_buffers; // free buffer for future re-use
4211 }
4212 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004213 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004214 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4215 pr_buf->th_doacross_info = NULL;
4216 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004217}
4218#endif
4219
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004220#if OMP_50_ENABLED
Jonathan Peytonebf18302019-04-08 17:59:28 +00004221/* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4222void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4223 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4224}
4225
4226void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4227 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4228}
4229
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004230int __kmpc_get_target_offload(void) {
4231 if (!__kmp_init_serial) {
4232 __kmp_serial_initialize();
4233 }
4234 return __kmp_target_offload;
4235}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004236
4237int __kmpc_pause_resource(kmp_pause_status_t level) {
4238 if (!__kmp_init_serial) {
4239 return 1; // Can't pause if runtime is not initialized
4240 }
4241 return __kmp_pause_resource(level);
4242}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004243#endif // OMP_50_ENABLED
4244
Jim Cownie5e8470a2013-09-27 10:38:44 +00004245// end of file //