blob: 8e9897862024b664341bc7bd1aa1045c242ed090 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000073#if KMP_OS_WINDOWS && OMPT_SUPPORT
74 // Normal exit process on Windows does not allow worker threads of the final
75 // parallel region to finish reporting their events, so shutting down the
76 // library here fixes the issue at least for the cases where __kmpc_end() is
77 // placed properly.
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
80#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
83/*!
84@ingroup THREAD_STATES
85@param loc Source location information.
86@return The global thread index of the active thread.
87
88This function can be called in any context.
89
90If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000091single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
92that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000093active parallel construct. (Or zero if there is no active parallel
94construct, since the master thread is necessarily thread zero).
95
96If multiple non-OpenMP threads all enter an OpenMP construct then this
97will be a unique thread identifier among all the threads created by
98the OpenMP runtime (but the value cannote be defined in terms of
99OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000101kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
102 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107}
108
109/*!
110@ingroup THREAD_STATES
111@param loc Source location information.
112@return The number of threads under control of the OpenMP<sup>*</sup> runtime
113
114This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000115It returns the total number of threads under the control of the OpenMP runtime.
116That is not a number that can be determined by any OpenMP standard calls, since
117the library may be called from more than one non-OpenMP thread, and this
118reflects the total over all such calls. Similarly the runtime maintains
119underlying threads even when they are not active (since the cost of creating
120and destroying OS threads is high), this call counts all such threads even if
121they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000123kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
124 KC_TRACE(10,
125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128}
129
130/*!
131@ingroup THREAD_STATES
132@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000133@return The thread number of the calling thread in the innermost active parallel
134construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000136kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000146kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148
Jonathan Peyton30419822017-05-12 18:01:32 +0000149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150}
151
152/*!
153 * @ingroup DEPRECATED
154 * @param loc location description
155 *
156 * This function need not be called. It always returns TRUE.
157 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000158kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159#ifndef KMP_DEBUG
160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
163#else
164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 const char *semi2;
166 const char *semi3;
167 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168
Jonathan Peyton30419822017-05-12 18:01:32 +0000169 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000171 }
172 semi2 = loc->psource;
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 semi2 = strchr(semi2, ';');
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2 + 1, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187 name--;
188 }
189 if ((*name == '/') || (*name == ';')) {
190 name++;
191 }
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
194 }
195 }
196 semi3 = strchr(semi2 + 1, ';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
201 }
202 }
203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
206 }
207 return __kmp_par_range < 0;
208 }
209 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210
211#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000217@return 1 if this thread is executing inside an active parallel region, zero if
218not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000220kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222}
223
224/*!
225@ingroup PARALLEL
226@param loc source location information
227@param global_tid global thread number
228@param num_threads number of threads requested for this parallel construct
229
230Set the number of threads to be used by the next fork spawned by this thread.
231This call is only required if the parallel construct has a `num_threads` clause.
232*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234 kmp_int32 num_threads) {
235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239}
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245}
246
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#if OMP_40_ENABLED
248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255}
256
257#endif /* OMP_40_ENABLED */
258
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259/*!
260@ingroup PARALLEL
261@param loc source location information
262@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000263@param microtask pointer to callback routine consisting of outlined parallel
264construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265@param ... pointers to shared variables that aren't global
266
267Do the actual fork and call the microtask in the relevant number of threads.
268*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000269void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000271
Jonathan Peyton61118492016-05-20 19:03:38 +0000272#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000273 // If we were in a serial region, then stop the serial timer, record
274 // the event, and start parallel region timer
275 stats_state_e previous_state = KMP_GET_THREAD_STATE();
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278 } else {
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000281 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 if (inParallel) {
283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284 } else {
285 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000286 }
287#endif
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 // maybe to save thr_state is enough here
290 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 va_list ap;
292 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000295 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000296 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300 if (lwt)
301 ompt_frame = &(lwt->ompt_task_info.frame);
302 else {
303 int tid = __kmp_tid_from_gtid(gtid);
304 ompt_frame = &(
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000308 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000309 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000310#endif
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312#if INCLUDE_SSC_MARKS
313 SSC_MARK_FORKING();
314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000319#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325#if INCLUDE_SSC_MARKS
326 SSC_MARK_JOINING();
327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000329#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000330 ,
331 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000337
338#if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341 } else {
342 KMP_POP_PARTITIONED_TIMER();
343 }
344#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
347#if OMP_40_ENABLED
348/*!
349@ingroup PARALLEL
350@param loc source location information
351@param global_tid global thread number
352@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000353@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354
355Set the number of teams to be used by the teams construct.
356This call is only required if the teams construct has a `num_teams` clause
357or a `thread_limit` clause (or both).
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360 kmp_int32 num_teams, kmp_int32 num_threads) {
361 KA_TRACE(20,
362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
Jonathan Peyton30419822017-05-12 18:01:32 +0000365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366}
367
368/*!
369@ingroup PARALLEL
370@param loc source location information
371@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000372@param microtask pointer to callback routine consisting of outlined teams
373construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374@param ... pointers to shared variables that aren't global
375
376Do the actual fork and call the microtask in the relevant number of threads.
377*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000378void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379 ...) {
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
382 va_list ap;
383 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000385#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000386 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000387 stats_state_e previous_state = KMP_GET_THREAD_STATE();
388 if (previous_state == stats_state_e::SERIAL_REGION) {
389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
390 } else {
391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
392 }
393#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000394
Jonathan Peyton30419822017-05-12 18:01:32 +0000395 // remember teams entry point and nesting level
396 this_thr->th.th_teams_microtask = microtask;
397 this_thr->th.th_teams_level =
398 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000399
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000401 kmp_team_t *parent_team = this_thr->th.th_team;
402 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000403 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000406 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000407 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000408#endif
409
Jonathan Peyton30419822017-05-12 18:01:32 +0000410 // check if __kmpc_push_num_teams called, set default number of teams
411 // otherwise
412 if (this_thr->th.th_teams_size.nteams == 0) {
413 __kmp_push_num_teams(loc, gtid, 0, 0);
414 }
415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 VOLATILE_CAST(microtask_t)
421 __kmp_teams_master, // "wrapped" task
422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000423#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000424 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000427#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000428 );
429 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000430#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000431 ,
432 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000433#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000434 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000435
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000436 // Pop current CG root off list
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439 this_thr->th.th_cg_roots = tmp->up;
440 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
441 " to node %p. cg_nthreads was %d\n",
442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
443 __kmp_free(tmp);
444 // Restore current task's thread_limit from CG root
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 this_thr->th.th_current_task->td_icvs.thread_limit =
447 this_thr->th.th_cg_roots->cg_thread_limit;
448
Jonathan Peyton30419822017-05-12 18:01:32 +0000449 this_thr->th.th_teams_microtask = NULL;
450 this_thr->th.th_teams_level = 0;
451 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452 va_end(ap);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000453#if KMP_STATS_ENABLED
454 if (previous_state == stats_state_e::SERIAL_REGION) {
455 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456 } else {
457 KMP_POP_PARTITIONED_TIMER();
458 }
459#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460}
461#endif /* OMP_40_ENABLED */
462
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463// I don't think this function should ever have been exported.
464// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
465// openmp code ever called it, but it's been exported from the RTL for so
466// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000467int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000468
469/*!
470@ingroup PARALLEL
471@param loc source location information
472@param global_tid global thread number
473
474Enter a serialized parallel construct. This interface is used to handle a
475conditional parallel region, like this,
476@code
477#pragma omp parallel if (condition)
478@endcode
479when the condition is false.
480*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000481void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000482// The implementation is now in kmp_runtime.cpp so that it can share static
483// functions with kmp_fork_call since the tasks to be done are similar in
484// each case.
485#if OMPT_SUPPORT
486 OMPT_STORE_RETURN_ADDRESS(global_tid);
487#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489}
490
491/*!
492@ingroup PARALLEL
493@param loc source location information
494@param global_tid global thread number
495
496Leave a serialized parallel construct.
497*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000498void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
499 kmp_internal_control_t *top;
500 kmp_info_t *this_thr;
501 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 KC_TRACE(10,
504 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 /* skip all this code for autopar serialized loops since it results in
507 unacceptable overhead */
508 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
509 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511 // Not autopar code
512 if (!TCR_4(__kmp_init_parallel))
513 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000515#if OMP_50_ENABLED
516 __kmp_resume_if_soft_paused();
517#endif
518
Jonathan Peyton30419822017-05-12 18:01:32 +0000519 this_thr = __kmp_threads[global_tid];
520 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522#if OMP_45_ENABLED
523 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000524
Jonathan Peyton30419822017-05-12 18:01:32 +0000525 // we need to wait for the proxy tasks before finishing the thread
526 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
527 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
528#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000529
Jonathan Peyton30419822017-05-12 18:01:32 +0000530 KMP_MB();
531 KMP_DEBUG_ASSERT(serial_team);
532 KMP_ASSERT(serial_team->t.t_serialized);
533 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
534 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
535 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
536 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000537
Joachim Protze82e94a52017-11-01 10:08:30 +0000538#if OMPT_SUPPORT
539 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000540 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
541 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000542 if (ompt_enabled.ompt_callback_implicit_task) {
543 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
544 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000545 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000546 }
547
548 // reset clear the task id only after unlinking the task
549 ompt_data_t *parent_task_data;
550 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
551
552 if (ompt_enabled.ompt_callback_parallel_end) {
553 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
554 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000555 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000556 }
557 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000558 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000559 }
560#endif
561
Jonathan Peyton30419822017-05-12 18:01:32 +0000562 /* If necessary, pop the internal control stack values and replace the team
563 * values */
564 top = serial_team->t.t_control_stack_top;
565 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
566 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
567 serial_team->t.t_control_stack_top = top->next;
568 __kmp_free(top);
569 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570
Jonathan Peyton30419822017-05-12 18:01:32 +0000571 // if( serial_team -> t.t_serialized > 1 )
572 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000573
Jonathan Peyton30419822017-05-12 18:01:32 +0000574 /* pop dispatch buffers stack */
575 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
576 {
577 dispatch_private_info_t *disp_buffer =
578 serial_team->t.t_dispatch->th_disp_buffer;
579 serial_team->t.t_dispatch->th_disp_buffer =
580 serial_team->t.t_dispatch->th_disp_buffer->next;
581 __kmp_free(disp_buffer);
582 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000583#if OMP_50_ENABLED
584 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
585#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586
Jonathan Peyton30419822017-05-12 18:01:32 +0000587 --serial_team->t.t_serialized;
588 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591
592#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000593 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
594 __kmp_clear_x87_fpu_status_word();
595 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
596 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
597 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
599
Jonathan Peyton30419822017-05-12 18:01:32 +0000600 this_thr->th.th_team = serial_team->t.t_parent;
601 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 /* restore values cached in the thread */
604 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
605 this_thr->th.th_team_master =
606 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
607 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000608
Jonathan Peyton30419822017-05-12 18:01:32 +0000609 /* TODO the below shouldn't need to be adjusted for serialized teams */
610 this_thr->th.th_dispatch =
611 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000614
Jonathan Peyton30419822017-05-12 18:01:32 +0000615 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
616 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617
Jonathan Peyton30419822017-05-12 18:01:32 +0000618 if (__kmp_tasking_mode != tskm_immediate_exec) {
619 // Copy the task team from the new child / old parent team to the thread.
620 this_thr->th.th_task_team =
621 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
622 KA_TRACE(20,
623 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
624 "team %p\n",
625 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000626 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000627 } else {
628 if (__kmp_tasking_mode != tskm_immediate_exec) {
629 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
630 "depth of serial team %p to %d\n",
631 global_tid, serial_team, serial_team->t.t_serialized));
632 }
633 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000634
Jonathan Peyton30419822017-05-12 18:01:32 +0000635 if (__kmp_env_consistency_check)
636 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000637#if OMPT_SUPPORT
638 if (ompt_enabled.enabled)
639 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000640 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
641 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000642#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643}
644
645/*!
646@ingroup SYNCHRONIZATION
647@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000649Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650depending on the memory ordering convention obeyed by the compiler
651even that may not be necessary).
652*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000653void __kmpc_flush(ident_t *loc) {
654 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000655
Jonathan Peyton30419822017-05-12 18:01:32 +0000656 /* need explicit __mf() here since use volatile instead in library */
657 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658
Jonathan Peyton30419822017-05-12 18:01:32 +0000659#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
660#if KMP_MIC
661// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
662// We shouldn't need it, though, since the ABI rules require that
663// * If the compiler generates NGO stores it also generates the fence
664// * If users hand-code NGO stores they should insert the fence
665// therefore no incomplete unordered stores should be visible.
666#else
667 // C74404
668 // This is to address non-temporal store instructions (sfence needed).
669 // The clflush instruction is addressed either (mfence needed).
670 // Probably the non-temporal load monvtdqa instruction should also be
671 // addressed.
672 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
673 if (!__kmp_cpuinfo.initialized) {
674 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000675 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000676 if (!__kmp_cpuinfo.sse2) {
677 // CPU cannot execute SSE2 instructions.
678 } else {
679#if KMP_COMPILER_ICC
680 _mm_mfence();
681#elif KMP_COMPILER_MSVC
682 MemoryBarrier();
683#else
684 __sync_synchronize();
685#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000686 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000687#endif // KMP_MIC
688#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
689// Nothing to see here move along
690#elif KMP_ARCH_PPC64
691// Nothing needed here (we have a real MB above).
692#if KMP_OS_CNK
693 // The flushing thread needs to yield here; this prevents a
694 // busy-waiting thread from saturating the pipeline. flush is
695 // often used in loops like this:
696 // while (!flag) {
697 // #pragma omp flush(flag)
698 // }
699 // and adding the yield here is good for at least a 10x speedup
700 // when running >2 threads per core (on the NAS LU benchmark).
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000701 __kmp_yield();
Jonathan Peyton30419822017-05-12 18:01:32 +0000702#endif
703#else
704#error Unknown or unsupported architecture
705#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000706
707#if OMPT_SUPPORT && OMPT_OPTIONAL
708 if (ompt_enabled.ompt_callback_flush) {
709 ompt_callbacks.ompt_callback(ompt_callback_flush)(
710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
711 }
712#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713}
714
715/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000716/*!
717@ingroup SYNCHRONIZATION
718@param loc source location information
719@param global_tid thread id.
720
721Execute a barrier.
722*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000723void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
724 KMP_COUNT_BLOCK(OMP_BARRIER);
725 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 if (!TCR_4(__kmp_init_parallel))
728 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000730#if OMP_50_ENABLED
731 __kmp_resume_if_soft_paused();
732#endif
733
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 if (__kmp_env_consistency_check) {
735 if (loc == 0) {
736 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000737 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738
Jonathan Peyton30419822017-05-12 18:01:32 +0000739 __kmp_check_barrier(global_tid, ct_barrier, loc);
740 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741
Joachim Protze82e94a52017-11-01 10:08:30 +0000742#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000743 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000744 if (ompt_enabled.enabled) {
745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000746 if (ompt_frame->enter_frame.ptr == NULL)
747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000748 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000749 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000750#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 __kmp_threads[global_tid]->th.th_ident = loc;
752 // TODO: explicit barrier_wait_id:
753 // this function is called when 'barrier' directive is present or
754 // implicit barrier at the end of a worksharing construct.
755 // 1) better to add a per-thread barrier counter to a thread data structure
756 // 2) set to 0 when a new team is created
757 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758
Jonathan Peyton30419822017-05-12 18:01:32 +0000759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000760#if OMPT_SUPPORT && OMPT_OPTIONAL
761 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000762 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000764#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000765}
766
767/* The BARRIER for a MASTER section is always explicit */
768/*!
769@ingroup WORK_SHARING
770@param loc source location information.
771@param global_tid global thread number .
772@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
773*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000774kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
775 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776
Jonathan Peyton30419822017-05-12 18:01:32 +0000777 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000778
Jonathan Peyton30419822017-05-12 18:01:32 +0000779 if (!TCR_4(__kmp_init_parallel))
780 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000782#if OMP_50_ENABLED
783 __kmp_resume_if_soft_paused();
784#endif
785
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 if (KMP_MASTER_GTID(global_tid)) {
787 KMP_COUNT_BLOCK(OMP_MASTER);
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789 status = 1;
790 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791
Joachim Protze82e94a52017-11-01 10:08:30 +0000792#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000793 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000794 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000797
Jonathan Peyton30419822017-05-12 18:01:32 +0000798 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000799 ompt_callbacks.ompt_callback(ompt_callback_master)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000803 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000805#endif
806
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000808#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000809 if (status)
810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
811 else
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000813#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 if (status)
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
816 else
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000818#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000819 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000820
Jonathan Peyton30419822017-05-12 18:01:32 +0000821 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822}
823
824/*!
825@ingroup WORK_SHARING
826@param loc source location information.
827@param global_tid global thread number .
828
Jonathan Peyton30419822017-05-12 18:01:32 +0000829Mark the end of a <tt>master</tt> region. This should only be called by the
830thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000832void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000834
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837
Joachim Protze82e94a52017-11-01 10:08:30 +0000838#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000841 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000842 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000843 ompt_callbacks.ompt_callback(ompt_callback_master)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000847 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000848#endif
849
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 if (__kmp_env_consistency_check) {
851 if (global_tid < 0)
852 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 if (KMP_MASTER_GTID(global_tid))
855 __kmp_pop_sync(global_tid, ct_master, loc);
856 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857}
858
859/*!
860@ingroup WORK_SHARING
861@param loc source location information.
862@param gtid global thread number.
863
864Start execution of an <tt>ordered</tt> construct.
865*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000866void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
867 int cid = 0;
868 kmp_info_t *th;
869 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870
Jonathan Peyton30419822017-05-12 18:01:32 +0000871 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 if (!TCR_4(__kmp_init_parallel))
874 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000876#if OMP_50_ENABLED
877 __kmp_resume_if_soft_paused();
878#endif
879
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000881 __kmp_itt_ordered_prep(gtid);
882// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000883#endif /* USE_ITT_BUILD */
884
Jonathan Peyton30419822017-05-12 18:01:32 +0000885 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000886
Joachim Protze82e94a52017-11-01 10:08:30 +0000887#if OMPT_SUPPORT && OMPT_OPTIONAL
888 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000889 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000890 void *codeptr_ra;
891 if (ompt_enabled.enabled) {
892 OMPT_STORE_RETURN_ADDRESS(gtid);
893 team = __kmp_team_from_gtid(gtid);
Joachim Protze4109d562019-05-20 14:21:42 +0000894 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000895 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000896 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000897 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000898
Jonathan Peyton30419822017-05-12 18:01:32 +0000899 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000900 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
901 if (ompt_enabled.ompt_callback_mutex_acquire) {
902 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze4109d562019-05-20 14:21:42 +0000903 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
904 codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000905 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000906 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000907#endif
908
Jonathan Peyton30419822017-05-12 18:01:32 +0000909 if (th->th.th_dispatch->th_deo_fcn != 0)
910 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
911 else
912 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000913
Joachim Protze82e94a52017-11-01 10:08:30 +0000914#if OMPT_SUPPORT && OMPT_OPTIONAL
915 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000916 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000917 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000918 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000919
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000921 if (ompt_enabled.ompt_callback_mutex_acquired) {
922 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +0000923 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000924 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000925 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000926#endif
927
Jim Cownie5e8470a2013-09-27 10:38:44 +0000928#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000929 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930#endif /* USE_ITT_BUILD */
931}
932
933/*!
934@ingroup WORK_SHARING
935@param loc source location information.
936@param gtid global thread number.
937
938End execution of an <tt>ordered</tt> construct.
939*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000940void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
941 int cid = 0;
942 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000943
Jonathan Peyton30419822017-05-12 18:01:32 +0000944 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000945
946#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000947 __kmp_itt_ordered_end(gtid);
948// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000949#endif /* USE_ITT_BUILD */
950
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952
Jonathan Peyton30419822017-05-12 18:01:32 +0000953 if (th->th.th_dispatch->th_dxo_fcn != 0)
954 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
955 else
956 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000957
Joachim Protze82e94a52017-11-01 10:08:30 +0000958#if OMPT_SUPPORT && OMPT_OPTIONAL
959 OMPT_STORE_RETURN_ADDRESS(gtid);
960 if (ompt_enabled.ompt_callback_mutex_released) {
961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
962 ompt_mutex_ordered,
Joachim Protze4109d562019-05-20 14:21:42 +0000963 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
964 ->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000965 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000966 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000967#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000968}
969
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000970#if KMP_USE_DYNAMIC_LOCK
971
Jonathan Peytondae13d82015-12-11 21:57:06 +0000972static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000973__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
974 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
975 // Pointer to the allocated indirect lock is written to crit, while indexing
976 // is ignored.
977 void *idx;
978 kmp_indirect_lock_t **lck;
979 lck = (kmp_indirect_lock_t **)crit;
980 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
981 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
982 KMP_SET_I_LOCK_LOCATION(ilk, loc);
983 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
984 KA_TRACE(20,
985 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000986#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000987 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000988#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000989 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000990 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000991#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000992 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000993#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 // We don't really need to destroy the unclaimed lock here since it will be
995 // cleaned up at program exit.
996 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
997 }
998 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000999}
1000
1001// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001002#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1003 { \
1004 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001005 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1006 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1007 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1008 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +00001009 kmp_uint32 spins; \
1010 KMP_FSYNC_PREPARE(l); \
1011 KMP_INIT_YIELD(spins); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001012 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001013 do { \
Jonathan Peyton30419822017-05-12 18:01:32 +00001014 if (TCR_4(__kmp_nth) > \
1015 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1016 KMP_YIELD(TRUE); \
1017 } else { \
1018 KMP_YIELD_SPIN(spins); \
1019 } \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001020 __kmp_spin_backoff(&backoff); \
1021 } while ( \
1022 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1023 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001024 } \
1025 KMP_FSYNC_ACQUIRED(l); \
1026 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001027
1028// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001029#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1030 { \
1031 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001032 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1033 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1034 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1035 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001036 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001037
1038// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001039#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001040 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001041
Jonathan Peytondae13d82015-12-11 21:57:06 +00001042#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001043
Jonathan Peyton30419822017-05-12 18:01:32 +00001044#include <sys/syscall.h>
1045#include <unistd.h>
1046#ifndef FUTEX_WAIT
1047#define FUTEX_WAIT 0
1048#endif
1049#ifndef FUTEX_WAKE
1050#define FUTEX_WAKE 1
1051#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001052
1053// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001054#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1055 { \
1056 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1057 kmp_int32 gtid_code = (gtid + 1) << 1; \
1058 KMP_MB(); \
1059 KMP_FSYNC_PREPARE(ftx); \
1060 kmp_int32 poll_val; \
1061 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1062 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1063 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1064 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1065 if (!cond) { \
1066 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1067 poll_val | \
1068 KMP_LOCK_BUSY(1, futex))) { \
1069 continue; \
1070 } \
1071 poll_val |= KMP_LOCK_BUSY(1, futex); \
1072 } \
1073 kmp_int32 rc; \
1074 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1075 NULL, NULL, 0)) != 0) { \
1076 continue; \
1077 } \
1078 gtid_code |= 1; \
1079 } \
1080 KMP_FSYNC_ACQUIRED(ftx); \
1081 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001082
1083// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001084#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1085 { \
1086 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1087 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1088 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1089 KMP_FSYNC_ACQUIRED(ftx); \
1090 rc = TRUE; \
1091 } else { \
1092 rc = FALSE; \
1093 } \
1094 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001095
1096// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001097#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1098 { \
1099 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1100 KMP_MB(); \
1101 KMP_FSYNC_RELEASING(ftx); \
1102 kmp_int32 poll_val = \
1103 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1104 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1105 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1106 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1107 } \
1108 KMP_MB(); \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001109 KMP_YIELD_OVERSUB(); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001110 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001111
Jonathan Peytondae13d82015-12-11 21:57:06 +00001112#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001113
1114#else // KMP_USE_DYNAMIC_LOCK
1115
Jonathan Peyton30419822017-05-12 18:01:32 +00001116static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1117 ident_t const *loc,
1118 kmp_int32 gtid) {
1119 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120
Jonathan Peyton30419822017-05-12 18:01:32 +00001121 // Because of the double-check, the following load doesn't need to be volatile
1122 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123
Jonathan Peyton30419822017-05-12 18:01:32 +00001124 if (lck == NULL) {
1125 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001126
Jonathan Peyton30419822017-05-12 18:01:32 +00001127 // Allocate & initialize the lock.
1128 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1129 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1130 __kmp_init_user_lock_with_checks(lck);
1131 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 __kmp_itt_critical_creating(lck);
1134// __kmp_itt_critical_creating() should be called *before* the first usage
1135// of underlying lock. It is the only place where we can guarantee it. There
1136// are chances the lock will destroyed with no usage, but it is not a
1137// problem, because this is not real event seen by user but rather setting
1138// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139#endif /* USE_ITT_BUILD */
1140
Jonathan Peyton30419822017-05-12 18:01:32 +00001141 // Use a cmpxchg instruction to slam the start of the critical section with
1142 // the lock pointer. If another thread beat us to it, deallocate the lock,
1143 // and use the lock that the other thread allocated.
1144 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001145
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 if (status == 0) {
1147// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001149 __kmp_itt_critical_destroyed(lck);
1150// Let ITT know the lock is destroyed and the same memory location may be reused
1151// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001153 __kmp_destroy_user_lock_with_checks(lck);
1154 __kmp_user_lock_free(&idx, gtid, lck);
1155 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1156 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001158 }
1159 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160}
1161
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001162#endif // KMP_USE_DYNAMIC_LOCK
1163
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164/*!
1165@ingroup WORK_SHARING
1166@param loc source location information.
1167@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001168@param crit identity of the critical section. This could be a pointer to a lock
1169associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001170
1171Enter code protected by a `critical` construct.
1172This function blocks until the executing thread can enter the critical section.
1173*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001174void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1175 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001176#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001177#if OMPT_SUPPORT && OMPT_OPTIONAL
1178 OMPT_STORE_RETURN_ADDRESS(global_tid);
1179#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001181#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001182 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001183#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001184 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001185 ompt_thread_info_t ti;
1186#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001190
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001192
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001193 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001194 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001195
Jonathan Peyton30419822017-05-12 18:01:32 +00001196 if ((__kmp_user_lock_kind == lk_tas) &&
1197 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1198 lck = (kmp_user_lock_p)crit;
1199 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001200#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001201 else if ((__kmp_user_lock_kind == lk_futex) &&
1202 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1203 lck = (kmp_user_lock_p)crit;
1204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001205#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001206 else { // ticket, queuing or drdpa
1207 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001209
Jonathan Peyton30419822017-05-12 18:01:32 +00001210 if (__kmp_env_consistency_check)
1211 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213// since the critical directive binds to all threads, not just the current
1214// team we have to check this even if we are in a serialized team.
1215// also, even if we are the uber thread, we still have to conduct the lock,
1216// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001217
1218#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001220#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001221#if OMPT_SUPPORT && OMPT_OPTIONAL
1222 OMPT_STORE_RETURN_ADDRESS(gtid);
1223 void *codeptr_ra = NULL;
1224 if (ompt_enabled.enabled) {
1225 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1226 /* OMPT state update */
1227 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001228 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001229 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001230
1231 /* OMPT event callback */
1232 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1233 if (ompt_enabled.ompt_callback_mutex_acquire) {
1234 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1235 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00001236 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001237 }
1238 }
1239#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001240 // Value of 'crit' should be good for using as a critical_id of the critical
1241 // section directive.
1242 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001243
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001244#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001246#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001247#if OMPT_SUPPORT && OMPT_OPTIONAL
1248 if (ompt_enabled.enabled) {
1249 /* OMPT state update */
1250 ti.state = prev_state;
1251 ti.wait_id = 0;
1252
1253 /* OMPT event callback */
1254 if (ompt_enabled.ompt_callback_mutex_acquired) {
1255 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001256 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001257 }
1258 }
1259#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001260 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001261
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001262 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001263 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001264#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001265}
1266
1267#if KMP_USE_DYNAMIC_LOCK
1268
1269// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001270static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001271#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001272#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001273#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001274#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001275#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001276
1277#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001278#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001279#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001280#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001281#endif
1282
Jonathan Peyton30419822017-05-12 18:01:32 +00001283 // Hints that do not require further logic
1284 if (hint & kmp_lock_hint_hle)
1285 return KMP_TSX_LOCK(hle);
1286 if (hint & kmp_lock_hint_rtm)
1287 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1288 if (hint & kmp_lock_hint_adaptive)
1289 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001290
Jonathan Peyton30419822017-05-12 18:01:32 +00001291 // Rule out conflicting hints first by returning the default lock
1292 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001293 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001294 if ((hint & omp_lock_hint_speculative) &&
1295 (hint & omp_lock_hint_nonspeculative))
1296 return __kmp_user_lock_seq;
1297
1298 // Do not even consider speculation when it appears to be contended
1299 if (hint & omp_lock_hint_contended)
1300 return lockseq_queuing;
1301
1302 // Uncontended lock without speculation
1303 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1304 return lockseq_tas;
1305
1306 // HLE lock for speculation
1307 if (hint & omp_lock_hint_speculative)
1308 return KMP_TSX_LOCK(hle);
1309
1310 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001311}
1312
Joachim Protze82e94a52017-11-01 10:08:30 +00001313#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001314#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001315static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001316__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1317 if (user_lock) {
1318 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1319 case 0:
1320 break;
1321#if KMP_USE_FUTEX
1322 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001323 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001324#endif
1325 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001326 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001327#if KMP_USE_TSX
1328 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001329 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001330#endif
1331 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001332 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001333 }
1334 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1335 }
1336 KMP_ASSERT(ilock);
1337 switch (ilock->type) {
1338#if KMP_USE_TSX
1339 case locktag_adaptive:
1340 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001341 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001342#endif
1343 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001344 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001345#if KMP_USE_FUTEX
1346 case locktag_nested_futex:
1347#endif
1348 case locktag_ticket:
1349 case locktag_queuing:
1350 case locktag_drdpa:
1351 case locktag_nested_ticket:
1352 case locktag_nested_queuing:
1353 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001354 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001355 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001356 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001357 }
1358}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001359#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001360// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001361static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001362 switch (__kmp_user_lock_kind) {
1363 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001364 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001365#if KMP_USE_FUTEX
1366 case lk_futex:
1367#endif
1368 case lk_ticket:
1369 case lk_queuing:
1370 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001371 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001372#if KMP_USE_TSX
1373 case lk_hle:
1374 case lk_rtm:
1375 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001376 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001377#endif
1378 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001379 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001380 }
1381}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001382#endif // KMP_USE_DYNAMIC_LOCK
1383#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001384
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001385/*!
1386@ingroup WORK_SHARING
1387@param loc source location information.
1388@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001389@param crit identity of the critical section. This could be a pointer to a lock
1390associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001391@param hint the lock hint.
1392
Jonathan Peyton30419822017-05-12 18:01:32 +00001393Enter code protected by a `critical` construct with a hint. The hint value is
1394used to suggest a lock implementation. This function blocks until the executing
1395thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001396speculative execution and the hardware supports it.
1397*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001398void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001399 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001400 KMP_COUNT_BLOCK(OMP_CRITICAL);
1401 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001402#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001403 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001404 ompt_thread_info_t ti;
1405 // This is the case, if called from __kmpc_critical:
1406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1407 if (!codeptr)
1408 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1409#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001410
Jonathan Peyton30419822017-05-12 18:01:32 +00001411 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001412
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1414 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001415 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001416 if (*lk == 0) {
1417 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1418 if (KMP_IS_D_LOCK(lckseq)) {
1419 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1420 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001421 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001422 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001423 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001424 }
1425 // Branch for accessing the actual lock object and set operation. This
1426 // branching is inevitable since this lock initialization does not follow the
1427 // normal dispatch path (lock table is not used).
1428 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1429 lck = (kmp_user_lock_p)lk;
1430 if (__kmp_env_consistency_check) {
1431 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1432 __kmp_map_hint_to_lock(hint));
1433 }
1434#if USE_ITT_BUILD
1435 __kmp_itt_critical_acquiring(lck);
1436#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001437#if OMPT_SUPPORT && OMPT_OPTIONAL
1438 if (ompt_enabled.enabled) {
1439 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1440 /* OMPT state update */
1441 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001442 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001443 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001444
1445 /* OMPT event callback */
1446 if (ompt_enabled.ompt_callback_mutex_acquire) {
1447 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1448 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001449 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1450 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001451 }
1452 }
1453#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001454#if KMP_USE_INLINED_TAS
1455 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1456 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1457 } else
1458#elif KMP_USE_INLINED_FUTEX
1459 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1460 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1461 } else
1462#endif
1463 {
1464 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1465 }
1466 } else {
1467 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1468 lck = ilk->lock;
1469 if (__kmp_env_consistency_check) {
1470 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1471 __kmp_map_hint_to_lock(hint));
1472 }
1473#if USE_ITT_BUILD
1474 __kmp_itt_critical_acquiring(lck);
1475#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001476#if OMPT_SUPPORT && OMPT_OPTIONAL
1477 if (ompt_enabled.enabled) {
1478 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1479 /* OMPT state update */
1480 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001481 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001482 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001483
1484 /* OMPT event callback */
1485 if (ompt_enabled.ompt_callback_mutex_acquire) {
1486 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1487 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001488 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1489 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001490 }
1491 }
1492#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001493 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1494 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001495 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001496
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001498 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001500#if OMPT_SUPPORT && OMPT_OPTIONAL
1501 if (ompt_enabled.enabled) {
1502 /* OMPT state update */
1503 ti.state = prev_state;
1504 ti.wait_id = 0;
1505
1506 /* OMPT event callback */
1507 if (ompt_enabled.ompt_callback_mutex_acquired) {
1508 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001509 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001510 }
1511 }
1512#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001513
Jonathan Peyton30419822017-05-12 18:01:32 +00001514 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1515 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001516} // __kmpc_critical_with_hint
1517
1518#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001519
1520/*!
1521@ingroup WORK_SHARING
1522@param loc source location information.
1523@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001524@param crit identity of the critical section. This could be a pointer to a lock
1525associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001526
1527Leave a critical section, releasing any lock that was held during its execution.
1528*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001529void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1530 kmp_critical_name *crit) {
1531 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001532
Jonathan Peyton30419822017-05-12 18:01:32 +00001533 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001534
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001535#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001536 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1537 lck = (kmp_user_lock_p)crit;
1538 KMP_ASSERT(lck != NULL);
1539 if (__kmp_env_consistency_check) {
1540 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001541 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001542#if USE_ITT_BUILD
1543 __kmp_itt_critical_releasing(lck);
1544#endif
1545#if KMP_USE_INLINED_TAS
1546 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1547 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1548 } else
1549#elif KMP_USE_INLINED_FUTEX
1550 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1551 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1552 } else
1553#endif
1554 {
1555 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1556 }
1557 } else {
1558 kmp_indirect_lock_t *ilk =
1559 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1560 KMP_ASSERT(ilk != NULL);
1561 lck = ilk->lock;
1562 if (__kmp_env_consistency_check) {
1563 __kmp_pop_sync(global_tid, ct_critical, loc);
1564 }
1565#if USE_ITT_BUILD
1566 __kmp_itt_critical_releasing(lck);
1567#endif
1568 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1569 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001570
1571#else // KMP_USE_DYNAMIC_LOCK
1572
Jonathan Peyton30419822017-05-12 18:01:32 +00001573 if ((__kmp_user_lock_kind == lk_tas) &&
1574 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1575 lck = (kmp_user_lock_p)crit;
1576 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001577#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 else if ((__kmp_user_lock_kind == lk_futex) &&
1579 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1580 lck = (kmp_user_lock_p)crit;
1581 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001583 else { // ticket, queuing or drdpa
1584 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1585 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001586
Jonathan Peyton30419822017-05-12 18:01:32 +00001587 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001588
Jonathan Peyton30419822017-05-12 18:01:32 +00001589 if (__kmp_env_consistency_check)
1590 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001591
1592#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001594#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001595 // Value of 'crit' should be good for using as a critical_id of the critical
1596 // section directive.
1597 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598
Joachim Protze82e94a52017-11-01 10:08:30 +00001599#endif // KMP_USE_DYNAMIC_LOCK
1600
1601#if OMPT_SUPPORT && OMPT_OPTIONAL
1602 /* OMPT release event triggers after lock is released; place here to trigger
1603 * for all #if branches */
1604 OMPT_STORE_RETURN_ADDRESS(global_tid);
1605 if (ompt_enabled.ompt_callback_mutex_released) {
1606 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00001607 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1608 OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001610#endif
1611
Jonathan Peyton30419822017-05-12 18:01:32 +00001612 KMP_POP_PARTITIONED_TIMER();
1613 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614}
1615
1616/*!
1617@ingroup SYNCHRONIZATION
1618@param loc source location information
1619@param global_tid thread id.
1620@return one if the thread should execute the master block, zero otherwise
1621
Jonathan Peyton30419822017-05-12 18:01:32 +00001622Start execution of a combined barrier and master. The barrier is executed inside
1623this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001624*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001625kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1626 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627
Jonathan Peyton30419822017-05-12 18:01:32 +00001628 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 if (!TCR_4(__kmp_init_parallel))
1631 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001632
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001633#if OMP_50_ENABLED
1634 __kmp_resume_if_soft_paused();
1635#endif
1636
Jonathan Peyton30419822017-05-12 18:01:32 +00001637 if (__kmp_env_consistency_check)
1638 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639
Joachim Protze82e94a52017-11-01 10:08:30 +00001640#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001641 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001642 if (ompt_enabled.enabled) {
1643 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001644 if (ompt_frame->enter_frame.ptr == NULL)
1645 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001646 OMPT_STORE_RETURN_ADDRESS(global_tid);
1647 }
1648#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001649#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001650 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001651#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001652 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001653#if OMPT_SUPPORT && OMPT_OPTIONAL
1654 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001655 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001656 }
1657#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001658
Jonathan Peyton30419822017-05-12 18:01:32 +00001659 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001660}
1661
1662/*!
1663@ingroup SYNCHRONIZATION
1664@param loc source location information
1665@param global_tid thread id.
1666
1667Complete the execution of a combined barrier and master. This function should
1668only be called at the completion of the <tt>master</tt> code. Other threads will
1669still be waiting at the barrier and this call releases them.
1670*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001671void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1672 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673
Jonathan Peyton30419822017-05-12 18:01:32 +00001674 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675}
1676
1677/*!
1678@ingroup SYNCHRONIZATION
1679@param loc source location information
1680@param global_tid thread id.
1681@return one if the thread should execute the master block, zero otherwise
1682
1683Start execution of a combined barrier and master(nowait) construct.
1684The barrier is executed inside this function.
1685There is no equivalent "end" function, since the
1686*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001687kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1688 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691
Jonathan Peyton30419822017-05-12 18:01:32 +00001692 if (!TCR_4(__kmp_init_parallel))
1693 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001695#if OMP_50_ENABLED
1696 __kmp_resume_if_soft_paused();
1697#endif
1698
Jonathan Peyton30419822017-05-12 18:01:32 +00001699 if (__kmp_env_consistency_check) {
1700 if (loc == 0) {
1701 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 __kmp_check_barrier(global_tid, ct_barrier, loc);
1704 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705
Joachim Protze82e94a52017-11-01 10:08:30 +00001706#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001707 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001708 if (ompt_enabled.enabled) {
1709 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001710 if (ompt_frame->enter_frame.ptr == NULL)
1711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001712 OMPT_STORE_RETURN_ADDRESS(global_tid);
1713 }
1714#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001715#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001717#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001719#if OMPT_SUPPORT && OMPT_OPTIONAL
1720 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001721 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001722 }
1723#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724
Jonathan Peyton30419822017-05-12 18:01:32 +00001725 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001726
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 if (__kmp_env_consistency_check) {
1728 /* there's no __kmpc_end_master called; so the (stats) */
1729 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730
Jonathan Peyton30419822017-05-12 18:01:32 +00001731 if (global_tid < 0) {
1732 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001733 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001734 if (ret) {
1735 /* only one thread should do the pop since only */
1736 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001737
Jonathan Peyton30419822017-05-12 18:01:32 +00001738 __kmp_pop_sync(global_tid, ct_master, loc);
1739 }
1740 }
1741
1742 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743}
1744
1745/* The BARRIER for a SINGLE process section is always explicit */
1746/*!
1747@ingroup WORK_SHARING
1748@param loc source location information
1749@param global_tid global thread number
1750@return One if this thread should execute the single construct, zero otherwise.
1751
1752Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001753There are no implicit barriers in the two "single" calls, rather the compiler
1754should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001755*/
1756
Jonathan Peyton30419822017-05-12 18:01:32 +00001757kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1758 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001759
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 if (rc) {
1761 // We are going to execute the single statement, so we should count it.
1762 KMP_COUNT_BLOCK(OMP_SINGLE);
1763 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1764 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001765
Joachim Protze82e94a52017-11-01 10:08:30 +00001766#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001767 kmp_info_t *this_thr = __kmp_threads[global_tid];
1768 kmp_team_t *team = this_thr->th.th_team;
1769 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001770
Joachim Protze82e94a52017-11-01 10:08:30 +00001771 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001772 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001773 if (ompt_enabled.ompt_callback_work) {
1774 ompt_callbacks.ompt_callback(ompt_callback_work)(
1775 ompt_work_single_executor, ompt_scope_begin,
1776 &(team->t.ompt_team_info.parallel_data),
1777 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1778 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001779 }
1780 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001781 if (ompt_enabled.ompt_callback_work) {
1782 ompt_callbacks.ompt_callback(ompt_callback_work)(
1783 ompt_work_single_other, ompt_scope_begin,
1784 &(team->t.ompt_team_info.parallel_data),
1785 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1786 1, OMPT_GET_RETURN_ADDRESS(0));
1787 ompt_callbacks.ompt_callback(ompt_callback_work)(
1788 ompt_work_single_other, ompt_scope_end,
1789 &(team->t.ompt_team_info.parallel_data),
1790 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1791 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001792 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001794 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001795#endif
1796
Jonathan Peyton30419822017-05-12 18:01:32 +00001797 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001798}
1799
1800/*!
1801@ingroup WORK_SHARING
1802@param loc source location information
1803@param global_tid global thread number
1804
1805Mark the end of a <tt>single</tt> construct. This function should
1806only be called by the thread that executed the block of code protected
1807by the `single` construct.
1808*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001809void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1810 __kmp_exit_single(global_tid);
1811 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001812
Joachim Protze82e94a52017-11-01 10:08:30 +00001813#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001814 kmp_info_t *this_thr = __kmp_threads[global_tid];
1815 kmp_team_t *team = this_thr->th.th_team;
1816 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001817
Joachim Protze82e94a52017-11-01 10:08:30 +00001818 if (ompt_enabled.ompt_callback_work) {
1819 ompt_callbacks.ompt_callback(ompt_callback_work)(
1820 ompt_work_single_executor, ompt_scope_end,
1821 &(team->t.ompt_team_info.parallel_data),
1822 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1823 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001825#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001826}
1827
1828/*!
1829@ingroup WORK_SHARING
1830@param loc Source location
1831@param global_tid Global thread id
1832
1833Mark the end of a statically scheduled loop.
1834*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001835void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001836 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001837 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001838
Joachim Protze82e94a52017-11-01 10:08:30 +00001839#if OMPT_SUPPORT && OMPT_OPTIONAL
1840 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001841 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001843 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1844 // Determine workshare type
1845 if (loc != NULL) {
1846 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1847 ompt_work_type = ompt_work_loop;
1848 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1849 ompt_work_type = ompt_work_sections;
1850 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1851 ompt_work_type = ompt_work_distribute;
1852 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001853 // use default set above.
1854 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001855 }
1856 KMP_DEBUG_ASSERT(ompt_work_type);
1857 }
1858 ompt_callbacks.ompt_callback(ompt_callback_work)(
1859 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1860 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001861 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001862#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001863 if (__kmp_env_consistency_check)
1864 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865}
1866
Jonathan Peyton30419822017-05-12 18:01:32 +00001867// User routines which take C-style arguments (call by value)
1868// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001869
Jonathan Peyton30419822017-05-12 18:01:32 +00001870void ompc_set_num_threads(int arg) {
1871 // !!!!! TODO: check the per-task binding
1872 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873}
1874
Jonathan Peyton30419822017-05-12 18:01:32 +00001875void ompc_set_dynamic(int flag) {
1876 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 /* For the thread-private implementation of the internal controls */
1879 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001884}
1885
Jonathan Peyton30419822017-05-12 18:01:32 +00001886void ompc_set_nested(int flag) {
1887 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889 /* For the thread-private internal controls implementation */
1890 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891
Jonathan Peyton30419822017-05-12 18:01:32 +00001892 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001893
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001894 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001895}
1896
Jonathan Peyton30419822017-05-12 18:01:32 +00001897void ompc_set_max_active_levels(int max_active_levels) {
1898 /* TO DO */
1899 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900
Jonathan Peyton30419822017-05-12 18:01:32 +00001901 /* For the per-thread internal controls implementation */
1902 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903}
1904
Jonathan Peyton30419822017-05-12 18:01:32 +00001905void ompc_set_schedule(omp_sched_t kind, int modifier) {
1906 // !!!!! TODO: check the per-task binding
1907 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001908}
1909
Jonathan Peyton30419822017-05-12 18:01:32 +00001910int ompc_get_ancestor_thread_num(int level) {
1911 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001912}
1913
Jonathan Peyton30419822017-05-12 18:01:32 +00001914int ompc_get_team_size(int level) {
1915 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916}
1917
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001918#if OMP_50_ENABLED
1919/* OpenMP 5.0 Affinity Format API */
1920
1921void ompc_set_affinity_format(char const *format) {
1922 if (!__kmp_init_serial) {
1923 __kmp_serial_initialize();
1924 }
1925 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1926 format, KMP_STRLEN(format) + 1);
1927}
1928
1929size_t ompc_get_affinity_format(char *buffer, size_t size) {
1930 size_t format_size;
1931 if (!__kmp_init_serial) {
1932 __kmp_serial_initialize();
1933 }
1934 format_size = KMP_STRLEN(__kmp_affinity_format);
1935 if (buffer && size) {
1936 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1937 format_size + 1);
1938 }
1939 return format_size;
1940}
1941
1942void ompc_display_affinity(char const *format) {
1943 int gtid;
1944 if (!TCR_4(__kmp_init_middle)) {
1945 __kmp_middle_initialize();
1946 }
1947 gtid = __kmp_get_gtid();
1948 __kmp_aux_display_affinity(gtid, format);
1949}
1950
1951size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1952 char const *format) {
1953 int gtid;
1954 size_t num_required;
1955 kmp_str_buf_t capture_buf;
1956 if (!TCR_4(__kmp_init_middle)) {
1957 __kmp_middle_initialize();
1958 }
1959 gtid = __kmp_get_gtid();
1960 __kmp_str_buf_init(&capture_buf);
1961 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1962 if (buffer && buf_size) {
1963 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1964 capture_buf.used + 1);
1965 }
1966 __kmp_str_buf_free(&capture_buf);
1967 return num_required;
1968}
1969#endif /* OMP_50_ENABLED */
1970
Jonathan Peyton30419822017-05-12 18:01:32 +00001971void kmpc_set_stacksize(int arg) {
1972 // __kmp_aux_set_stacksize initializes the library if needed
1973 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974}
1975
Jonathan Peyton30419822017-05-12 18:01:32 +00001976void kmpc_set_stacksize_s(size_t arg) {
1977 // __kmp_aux_set_stacksize initializes the library if needed
1978 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001979}
1980
Jonathan Peyton30419822017-05-12 18:01:32 +00001981void kmpc_set_blocktime(int arg) {
1982 int gtid, tid;
1983 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 gtid = __kmp_entry_gtid();
1986 tid = __kmp_tid_from_gtid(gtid);
1987 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990}
1991
Jonathan Peyton30419822017-05-12 18:01:32 +00001992void kmpc_set_library(int arg) {
1993 // __kmp_user_set_library initializes the library if needed
1994 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995}
1996
Jonathan Peyton30419822017-05-12 18:01:32 +00001997void kmpc_set_defaults(char const *str) {
1998 // __kmp_aux_set_defaults initializes the library if needed
1999 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000}
2001
Jonathan Peyton30419822017-05-12 18:01:32 +00002002void kmpc_set_disp_num_buffers(int arg) {
2003 // ignore after initialization because some teams have already
2004 // allocated dispatch buffers
2005 if (__kmp_init_serial == 0 && arg > 0)
2006 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00002007}
2008
Jonathan Peyton30419822017-05-12 18:01:32 +00002009int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002010#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002013 if (!TCR_4(__kmp_init_middle)) {
2014 __kmp_middle_initialize();
2015 }
2016 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017#endif
2018}
2019
Jonathan Peyton30419822017-05-12 18:01:32 +00002020int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002021#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002022 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002024 if (!TCR_4(__kmp_init_middle)) {
2025 __kmp_middle_initialize();
2026 }
2027 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002028#endif
2029}
2030
Jonathan Peyton30419822017-05-12 18:01:32 +00002031int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002032#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002033 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002035 if (!TCR_4(__kmp_init_middle)) {
2036 __kmp_middle_initialize();
2037 }
2038 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039#endif
2040}
2041
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042/* -------------------------------------------------------------------------- */
2043/*!
2044@ingroup THREADPRIVATE
2045@param loc source location information
2046@param gtid global thread number
2047@param cpy_size size of the cpy_data buffer
2048@param cpy_data pointer to data to be copied
2049@param cpy_func helper function to call for copying data
2050@param didit flag variable: 1=single thread; 0=not single thread
2051
Jonathan Peyton30419822017-05-12 18:01:32 +00002052__kmpc_copyprivate implements the interface for the private data broadcast
2053needed for the copyprivate clause associated with a single region in an
2054OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002056One of the threads (called the single thread) should have the <tt>didit</tt>
2057variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058All threads pass a pointer to a data buffer (cpy_data) that they have built.
2059
Jonathan Peyton30419822017-05-12 18:01:32 +00002060The OpenMP specification forbids the use of nowait on the single region when a
2061copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2062barrier internally to avoid race conditions, so the code generation for the
2063single region should avoid generating a barrier after the call to @ref
2064__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065
2066The <tt>gtid</tt> parameter is the global thread id for the current thread.
2067The <tt>loc</tt> parameter is a pointer to source location information.
2068
Jonathan Peyton30419822017-05-12 18:01:32 +00002069Internal implementation: The single thread will first copy its descriptor
2070address (cpy_data) to a team-private location, then the other threads will each
2071call the function pointed to by the parameter cpy_func, which carries out the
2072copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002073
Jonathan Peyton30419822017-05-12 18:01:32 +00002074The cpy_func routine used for the copy and the contents of the data area defined
2075by cpy_data and cpy_size may be built in any fashion that will allow the copy
2076to be done. For instance, the cpy_data buffer can hold the actual data to be
2077copied or it may hold a list of pointers to the data. The cpy_func routine must
2078interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079
2080The interface to cpy_func is as follows:
2081@code
2082void cpy_func( void *destination, void *source )
2083@endcode
2084where void *destination is the cpy_data pointer for the thread being copied to
2085and void *source is the cpy_data pointer for the thread being copied from.
2086*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002087void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2088 void *cpy_data, void (*cpy_func)(void *, void *),
2089 kmp_int32 didit) {
2090 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
Jonathan Peyton30419822017-05-12 18:01:32 +00002092 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002093
Jonathan Peyton30419822017-05-12 18:01:32 +00002094 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
Jonathan Peyton30419822017-05-12 18:01:32 +00002096 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 if (__kmp_env_consistency_check) {
2099 if (loc == 0) {
2100 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002102 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103
Jonathan Peyton30419822017-05-12 18:01:32 +00002104 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105
Jonathan Peyton30419822017-05-12 18:01:32 +00002106 if (didit)
2107 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108
Joachim Protze82e94a52017-11-01 10:08:30 +00002109#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002110 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002111 if (ompt_enabled.enabled) {
2112 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002113 if (ompt_frame->enter_frame.ptr == NULL)
2114 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002115 OMPT_STORE_RETURN_ADDRESS(gtid);
2116 }
2117#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002118/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002121#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002122 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123
Jonathan Peyton30419822017-05-12 18:01:32 +00002124 if (!didit)
2125 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126
Jonathan Peyton30419822017-05-12 18:01:32 +00002127// Consider next barrier a user-visible barrier for barrier region boundaries
2128// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129
Joachim Protze82e94a52017-11-01 10:08:30 +00002130#if OMPT_SUPPORT
2131 if (ompt_enabled.enabled) {
2132 OMPT_STORE_RETURN_ADDRESS(gtid);
2133 }
2134#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002135#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002136 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2137// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002138#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002140#if OMPT_SUPPORT && OMPT_OPTIONAL
2141 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002142 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002143 }
2144#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145}
2146
2147/* -------------------------------------------------------------------------- */
2148
Jonathan Peyton30419822017-05-12 18:01:32 +00002149#define INIT_LOCK __kmp_init_user_lock_with_checks
2150#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2151#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2152#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2153#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2154#define ACQUIRE_NESTED_LOCK_TIMED \
2155 __kmp_acquire_nested_user_lock_with_checks_timed
2156#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2157#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2158#define TEST_LOCK __kmp_test_user_lock_with_checks
2159#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2160#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2161#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162
Jonathan Peyton30419822017-05-12 18:01:32 +00002163// TODO: Make check abort messages use location info & pass it into
2164// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002165
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002166#if KMP_USE_DYNAMIC_LOCK
2167
2168// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002169static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2170 kmp_dyna_lockseq_t seq) {
2171 if (KMP_IS_D_LOCK(seq)) {
2172 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002173#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002174 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002175#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002176 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002177 KMP_INIT_I_LOCK(lock, seq);
2178#if USE_ITT_BUILD
2179 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2180 __kmp_itt_lock_creating(ilk->lock, loc);
2181#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002182 }
2183}
2184
2185// internal nest lock initializer
2186static __forceinline void
2187__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2188 kmp_dyna_lockseq_t seq) {
2189#if KMP_USE_TSX
2190 // Don't have nested lock implementation for speculative locks
2191 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2192 seq = __kmp_user_lock_seq;
2193#endif
2194 switch (seq) {
2195 case lockseq_tas:
2196 seq = lockseq_nested_tas;
2197 break;
2198#if KMP_USE_FUTEX
2199 case lockseq_futex:
2200 seq = lockseq_nested_futex;
2201 break;
2202#endif
2203 case lockseq_ticket:
2204 seq = lockseq_nested_ticket;
2205 break;
2206 case lockseq_queuing:
2207 seq = lockseq_nested_queuing;
2208 break;
2209 case lockseq_drdpa:
2210 seq = lockseq_nested_drdpa;
2211 break;
2212 default:
2213 seq = lockseq_nested_queuing;
2214 }
2215 KMP_INIT_I_LOCK(lock, seq);
2216#if USE_ITT_BUILD
2217 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2218 __kmp_itt_lock_creating(ilk->lock, loc);
2219#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002220}
2221
2222/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002223void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2224 uintptr_t hint) {
2225 KMP_DEBUG_ASSERT(__kmp_init_serial);
2226 if (__kmp_env_consistency_check && user_lock == NULL) {
2227 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2228 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002229
Jonathan Peyton30419822017-05-12 18:01:32 +00002230 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002231
2232#if OMPT_SUPPORT && OMPT_OPTIONAL
2233 // This is the case, if called from omp_init_lock_with_hint:
2234 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2235 if (!codeptr)
2236 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2237 if (ompt_enabled.ompt_callback_lock_init) {
2238 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2239 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002240 __ompt_get_mutex_impl_type(user_lock),
2241 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002242 }
2243#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002244}
2245
2246/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002247void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2248 void **user_lock, uintptr_t hint) {
2249 KMP_DEBUG_ASSERT(__kmp_init_serial);
2250 if (__kmp_env_consistency_check && user_lock == NULL) {
2251 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2252 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002253
Jonathan Peyton30419822017-05-12 18:01:32 +00002254 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002255
2256#if OMPT_SUPPORT && OMPT_OPTIONAL
2257 // This is the case, if called from omp_init_lock_with_hint:
2258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2259 if (!codeptr)
2260 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2261 if (ompt_enabled.ompt_callback_lock_init) {
2262 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2263 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002264 __ompt_get_mutex_impl_type(user_lock),
2265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002266 }
2267#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002268}
2269
2270#endif // KMP_USE_DYNAMIC_LOCK
2271
Jim Cownie5e8470a2013-09-27 10:38:44 +00002272/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002273void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002274#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002275
2276 KMP_DEBUG_ASSERT(__kmp_init_serial);
2277 if (__kmp_env_consistency_check && user_lock == NULL) {
2278 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2279 }
2280 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002281
Joachim Protze82e94a52017-11-01 10:08:30 +00002282#if OMPT_SUPPORT && OMPT_OPTIONAL
2283 // This is the case, if called from omp_init_lock_with_hint:
2284 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2285 if (!codeptr)
2286 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2287 if (ompt_enabled.ompt_callback_lock_init) {
2288 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2289 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002290 __ompt_get_mutex_impl_type(user_lock),
2291 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002292 }
2293#endif
2294
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002295#else // KMP_USE_DYNAMIC_LOCK
2296
Jonathan Peyton30419822017-05-12 18:01:32 +00002297 static char const *const func = "omp_init_lock";
2298 kmp_user_lock_p lck;
2299 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300
Jonathan Peyton30419822017-05-12 18:01:32 +00002301 if (__kmp_env_consistency_check) {
2302 if (user_lock == NULL) {
2303 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002306
Jonathan Peyton30419822017-05-12 18:01:32 +00002307 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308
Jonathan Peyton30419822017-05-12 18:01:32 +00002309 if ((__kmp_user_lock_kind == lk_tas) &&
2310 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2311 lck = (kmp_user_lock_p)user_lock;
2312 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002313#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002314 else if ((__kmp_user_lock_kind == lk_futex) &&
2315 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2316 lck = (kmp_user_lock_p)user_lock;
2317 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 else {
2320 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2321 }
2322 INIT_LOCK(lck);
2323 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324
Joachim Protze82e94a52017-11-01 10:08:30 +00002325#if OMPT_SUPPORT && OMPT_OPTIONAL
2326 // This is the case, if called from omp_init_lock_with_hint:
2327 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2328 if (!codeptr)
2329 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2330 if (ompt_enabled.ompt_callback_lock_init) {
2331 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2332 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002333 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002334 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002335#endif
2336
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002338 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002340
2341#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002342} // __kmpc_init_lock
2343
2344/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002345void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002346#if KMP_USE_DYNAMIC_LOCK
2347
Jonathan Peyton30419822017-05-12 18:01:32 +00002348 KMP_DEBUG_ASSERT(__kmp_init_serial);
2349 if (__kmp_env_consistency_check && user_lock == NULL) {
2350 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2351 }
2352 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002353
Joachim Protze82e94a52017-11-01 10:08:30 +00002354#if OMPT_SUPPORT && OMPT_OPTIONAL
2355 // This is the case, if called from omp_init_lock_with_hint:
2356 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2357 if (!codeptr)
2358 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2359 if (ompt_enabled.ompt_callback_lock_init) {
2360 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2361 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002362 __ompt_get_mutex_impl_type(user_lock),
2363 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002364 }
2365#endif
2366
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002367#else // KMP_USE_DYNAMIC_LOCK
2368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 static char const *const func = "omp_init_nest_lock";
2370 kmp_user_lock_p lck;
2371 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 if (__kmp_env_consistency_check) {
2374 if (user_lock == NULL) {
2375 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002377 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002378
Jonathan Peyton30419822017-05-12 18:01:32 +00002379 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380
Jonathan Peyton30419822017-05-12 18:01:32 +00002381 if ((__kmp_user_lock_kind == lk_tas) &&
2382 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2383 OMP_NEST_LOCK_T_SIZE)) {
2384 lck = (kmp_user_lock_p)user_lock;
2385 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002386#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002387 else if ((__kmp_user_lock_kind == lk_futex) &&
2388 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2389 OMP_NEST_LOCK_T_SIZE)) {
2390 lck = (kmp_user_lock_p)user_lock;
2391 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002392#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002393 else {
2394 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2395 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396
Jonathan Peyton30419822017-05-12 18:01:32 +00002397 INIT_NESTED_LOCK(lck);
2398 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002399
Joachim Protze82e94a52017-11-01 10:08:30 +00002400#if OMPT_SUPPORT && OMPT_OPTIONAL
2401 // This is the case, if called from omp_init_lock_with_hint:
2402 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2403 if (!codeptr)
2404 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2405 if (ompt_enabled.ompt_callback_lock_init) {
2406 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2407 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002408 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002409 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002410#endif
2411
Jim Cownie5e8470a2013-09-27 10:38:44 +00002412#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002413 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002415
2416#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002417} // __kmpc_init_nest_lock
2418
Jonathan Peyton30419822017-05-12 18:01:32 +00002419void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002420#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421
Jonathan Peyton30419822017-05-12 18:01:32 +00002422#if USE_ITT_BUILD
2423 kmp_user_lock_p lck;
2424 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2425 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2426 } else {
2427 lck = (kmp_user_lock_p)user_lock;
2428 }
2429 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002431#if OMPT_SUPPORT && OMPT_OPTIONAL
2432 // This is the case, if called from omp_init_lock_with_hint:
2433 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2434 if (!codeptr)
2435 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2436 if (ompt_enabled.ompt_callback_lock_destroy) {
2437 kmp_user_lock_p lck;
2438 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2439 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2440 } else {
2441 lck = (kmp_user_lock_p)user_lock;
2442 }
2443 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002444 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002445 }
2446#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002447 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2448#else
2449 kmp_user_lock_p lck;
2450
2451 if ((__kmp_user_lock_kind == lk_tas) &&
2452 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2453 lck = (kmp_user_lock_p)user_lock;
2454 }
2455#if KMP_USE_FUTEX
2456 else if ((__kmp_user_lock_kind == lk_futex) &&
2457 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2458 lck = (kmp_user_lock_p)user_lock;
2459 }
2460#endif
2461 else {
2462 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2463 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464
Joachim Protze82e94a52017-11-01 10:08:30 +00002465#if OMPT_SUPPORT && OMPT_OPTIONAL
2466 // This is the case, if called from omp_init_lock_with_hint:
2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2468 if (!codeptr)
2469 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470 if (ompt_enabled.ompt_callback_lock_destroy) {
2471 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002472 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002473 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002474#endif
2475
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002477 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002479 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480
Jonathan Peyton30419822017-05-12 18:01:32 +00002481 if ((__kmp_user_lock_kind == lk_tas) &&
2482 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2483 ;
2484 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002485#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002486 else if ((__kmp_user_lock_kind == lk_futex) &&
2487 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2488 ;
2489 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002490#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002491 else {
2492 __kmp_user_lock_free(user_lock, gtid, lck);
2493 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002494#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495} // __kmpc_destroy_lock
2496
2497/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002498void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002499#if KMP_USE_DYNAMIC_LOCK
2500
Jonathan Peyton30419822017-05-12 18:01:32 +00002501#if USE_ITT_BUILD
2502 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2503 __kmp_itt_lock_destroyed(ilk->lock);
2504#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002505#if OMPT_SUPPORT && OMPT_OPTIONAL
2506 // This is the case, if called from omp_init_lock_with_hint:
2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2508 if (!codeptr)
2509 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510 if (ompt_enabled.ompt_callback_lock_destroy) {
2511 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002512 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002513 }
2514#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002515 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002516
2517#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002518
Jonathan Peyton30419822017-05-12 18:01:32 +00002519 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002520
Jonathan Peyton30419822017-05-12 18:01:32 +00002521 if ((__kmp_user_lock_kind == lk_tas) &&
2522 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2523 OMP_NEST_LOCK_T_SIZE)) {
2524 lck = (kmp_user_lock_p)user_lock;
2525 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002526#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002527 else if ((__kmp_user_lock_kind == lk_futex) &&
2528 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2529 OMP_NEST_LOCK_T_SIZE)) {
2530 lck = (kmp_user_lock_p)user_lock;
2531 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002533 else {
2534 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2535 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536
Joachim Protze82e94a52017-11-01 10:08:30 +00002537#if OMPT_SUPPORT && OMPT_OPTIONAL
2538 // This is the case, if called from omp_init_lock_with_hint:
2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540 if (!codeptr)
2541 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled.ompt_callback_lock_destroy) {
2543 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002544 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002545 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002546#endif
2547
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002549 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550#endif /* USE_ITT_BUILD */
2551
Jonathan Peyton30419822017-05-12 18:01:32 +00002552 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553
Jonathan Peyton30419822017-05-12 18:01:32 +00002554 if ((__kmp_user_lock_kind == lk_tas) &&
2555 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2556 OMP_NEST_LOCK_T_SIZE)) {
2557 ;
2558 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002559#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002560 else if ((__kmp_user_lock_kind == lk_futex) &&
2561 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2562 OMP_NEST_LOCK_T_SIZE)) {
2563 ;
2564 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002565#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002566 else {
2567 __kmp_user_lock_free(user_lock, gtid, lck);
2568 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002569#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002570} // __kmpc_destroy_nest_lock
2571
Jonathan Peyton30419822017-05-12 18:01:32 +00002572void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2573 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002574#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002575 int tag = KMP_EXTRACT_D_TAG(user_lock);
2576#if USE_ITT_BUILD
2577 __kmp_itt_lock_acquiring(
2578 (kmp_user_lock_p)
2579 user_lock); // itt function will get to the right lock object.
2580#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002581#if OMPT_SUPPORT && OMPT_OPTIONAL
2582 // This is the case, if called from omp_init_lock_with_hint:
2583 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2584 if (!codeptr)
2585 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2586 if (ompt_enabled.ompt_callback_mutex_acquire) {
2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2588 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002589 __ompt_get_mutex_impl_type(user_lock),
2590 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002591 }
2592#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002593#if KMP_USE_INLINED_TAS
2594 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2595 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2596 } else
2597#elif KMP_USE_INLINED_FUTEX
2598 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2599 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2600 } else
2601#endif
2602 {
2603 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2604 }
2605#if USE_ITT_BUILD
2606 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2607#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002608#if OMPT_SUPPORT && OMPT_OPTIONAL
2609 if (ompt_enabled.ompt_callback_mutex_acquired) {
2610 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002611 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002612 }
2613#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002614
2615#else // KMP_USE_DYNAMIC_LOCK
2616
Jonathan Peyton30419822017-05-12 18:01:32 +00002617 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618
Jonathan Peyton30419822017-05-12 18:01:32 +00002619 if ((__kmp_user_lock_kind == lk_tas) &&
2620 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2621 lck = (kmp_user_lock_p)user_lock;
2622 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002623#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002624 else if ((__kmp_user_lock_kind == lk_futex) &&
2625 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2626 lck = (kmp_user_lock_p)user_lock;
2627 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002628#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002629 else {
2630 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2631 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
2633#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002634 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002635#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002636#if OMPT_SUPPORT && OMPT_OPTIONAL
2637 // This is the case, if called from omp_init_lock_with_hint:
2638 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2639 if (!codeptr)
2640 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2641 if (ompt_enabled.ompt_callback_mutex_acquire) {
2642 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2643 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002644 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002645 }
2646#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002647
Jonathan Peyton30419822017-05-12 18:01:32 +00002648 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002649
2650#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002651 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002652#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653
Joachim Protze82e94a52017-11-01 10:08:30 +00002654#if OMPT_SUPPORT && OMPT_OPTIONAL
2655 if (ompt_enabled.ompt_callback_mutex_acquired) {
2656 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002657 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002659#endif
2660
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002661#endif // KMP_USE_DYNAMIC_LOCK
2662}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002663
Jonathan Peyton30419822017-05-12 18:01:32 +00002664void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002665#if KMP_USE_DYNAMIC_LOCK
2666
Jonathan Peyton30419822017-05-12 18:01:32 +00002667#if USE_ITT_BUILD
2668 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2669#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002670#if OMPT_SUPPORT && OMPT_OPTIONAL
2671 // This is the case, if called from omp_init_lock_with_hint:
2672 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2673 if (!codeptr)
2674 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2675 if (ompt_enabled.enabled) {
2676 if (ompt_enabled.ompt_callback_mutex_acquire) {
2677 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2678 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002679 __ompt_get_mutex_impl_type(user_lock),
2680 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002681 }
2682 }
2683#endif
2684 int acquire_status =
2685 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002686 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002687#if USE_ITT_BUILD
2688 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002689#endif
2690
Joachim Protze82e94a52017-11-01 10:08:30 +00002691#if OMPT_SUPPORT && OMPT_OPTIONAL
2692 if (ompt_enabled.enabled) {
2693 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2694 if (ompt_enabled.ompt_callback_mutex_acquired) {
2695 // lock_first
2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002697 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2698 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002699 }
2700 } else {
2701 if (ompt_enabled.ompt_callback_nest_lock) {
2702 // lock_next
2703 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002704 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002705 }
2706 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002707 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002708#endif
2709
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002710#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 int acquire_status;
2712 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002713
Jonathan Peyton30419822017-05-12 18:01:32 +00002714 if ((__kmp_user_lock_kind == lk_tas) &&
2715 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2716 OMP_NEST_LOCK_T_SIZE)) {
2717 lck = (kmp_user_lock_p)user_lock;
2718 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002719#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2722 OMP_NEST_LOCK_T_SIZE)) {
2723 lck = (kmp_user_lock_p)user_lock;
2724 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 else {
2727 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2728 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729
2730#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002731 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002732#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002733#if OMPT_SUPPORT && OMPT_OPTIONAL
2734 // This is the case, if called from omp_init_lock_with_hint:
2735 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2736 if (!codeptr)
2737 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2738 if (ompt_enabled.enabled) {
2739 if (ompt_enabled.ompt_callback_mutex_acquire) {
2740 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2741 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002742 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2743 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002744 }
2745 }
2746#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749
2750#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002751 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002753
Joachim Protze82e94a52017-11-01 10:08:30 +00002754#if OMPT_SUPPORT && OMPT_OPTIONAL
2755 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002756 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002757 if (ompt_enabled.ompt_callback_mutex_acquired) {
2758 // lock_first
2759 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002760 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002761 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002762 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002763 if (ompt_enabled.ompt_callback_nest_lock) {
2764 // lock_next
2765 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002766 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002767 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002768 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002769 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002770#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002771
2772#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773}
2774
Jonathan Peyton30419822017-05-12 18:01:32 +00002775void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002776#if KMP_USE_DYNAMIC_LOCK
2777
Jonathan Peyton30419822017-05-12 18:01:32 +00002778 int tag = KMP_EXTRACT_D_TAG(user_lock);
2779#if USE_ITT_BUILD
2780 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2781#endif
2782#if KMP_USE_INLINED_TAS
2783 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2784 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2785 } else
2786#elif KMP_USE_INLINED_FUTEX
2787 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2788 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2789 } else
2790#endif
2791 {
2792 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2793 }
2794
Joachim Protze82e94a52017-11-01 10:08:30 +00002795#if OMPT_SUPPORT && OMPT_OPTIONAL
2796 // This is the case, if called from omp_init_lock_with_hint:
2797 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2798 if (!codeptr)
2799 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2800 if (ompt_enabled.ompt_callback_mutex_released) {
2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002803 }
2804#endif
2805
Jonathan Peyton30419822017-05-12 18:01:32 +00002806#else // KMP_USE_DYNAMIC_LOCK
2807
2808 kmp_user_lock_p lck;
2809
2810 /* Can't use serial interval since not block structured */
2811 /* release the lock */
2812
2813 if ((__kmp_user_lock_kind == lk_tas) &&
2814 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2815#if KMP_OS_LINUX && \
2816 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2817// "fast" path implemented to fix customer performance issue
2818#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002819 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002820#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002821 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2822 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002823
2824#if OMPT_SUPPORT && OMPT_OPTIONAL
2825 // This is the case, if called from omp_init_lock_with_hint:
2826 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2827 if (!codeptr)
2828 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2829 if (ompt_enabled.ompt_callback_mutex_released) {
2830 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002831 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002832 }
2833#endif
2834
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002840#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 else if ((__kmp_user_lock_kind == lk_futex) &&
2842 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2843 lck = (kmp_user_lock_p)user_lock;
2844 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002845#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002846 else {
2847 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2848 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002849
2850#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002851 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002852#endif /* USE_ITT_BUILD */
2853
Jonathan Peyton30419822017-05-12 18:01:32 +00002854 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002855
Joachim Protze82e94a52017-11-01 10:08:30 +00002856#if OMPT_SUPPORT && OMPT_OPTIONAL
2857 // This is the case, if called from omp_init_lock_with_hint:
2858 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2859 if (!codeptr)
2860 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2861 if (ompt_enabled.ompt_callback_mutex_released) {
2862 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002863 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002865#endif
2866
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002867#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868}
2869
2870/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002871void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002872#if KMP_USE_DYNAMIC_LOCK
2873
Jonathan Peyton30419822017-05-12 18:01:32 +00002874#if USE_ITT_BUILD
2875 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2876#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002877 int release_status =
2878 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002879 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002880
2881#if OMPT_SUPPORT && OMPT_OPTIONAL
2882 // This is the case, if called from omp_init_lock_with_hint:
2883 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2884 if (!codeptr)
2885 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2886 if (ompt_enabled.enabled) {
2887 if (release_status == KMP_LOCK_RELEASED) {
2888 if (ompt_enabled.ompt_callback_mutex_released) {
2889 // release_lock_last
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002891 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2892 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002893 }
2894 } else if (ompt_enabled.ompt_callback_nest_lock) {
2895 // release_lock_prev
2896 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002897 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002898 }
2899 }
2900#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002901
2902#else // KMP_USE_DYNAMIC_LOCK
2903
2904 kmp_user_lock_p lck;
2905
2906 /* Can't use serial interval since not block structured */
2907
2908 if ((__kmp_user_lock_kind == lk_tas) &&
2909 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2910 OMP_NEST_LOCK_T_SIZE)) {
2911#if KMP_OS_LINUX && \
2912 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2913 // "fast" path implemented to fix customer performance issue
2914 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2915#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002916 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002917#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002918
2919#if OMPT_SUPPORT && OMPT_OPTIONAL
2920 int release_status = KMP_LOCK_STILL_HELD;
2921#endif
2922
Jonathan Peyton30419822017-05-12 18:01:32 +00002923 if (--(tl->lk.depth_locked) == 0) {
2924 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002925#if OMPT_SUPPORT && OMPT_OPTIONAL
2926 release_status = KMP_LOCK_RELEASED;
2927#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002928 }
2929 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002930
2931#if OMPT_SUPPORT && OMPT_OPTIONAL
2932 // This is the case, if called from omp_init_lock_with_hint:
2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2934 if (!codeptr)
2935 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936 if (ompt_enabled.enabled) {
2937 if (release_status == KMP_LOCK_RELEASED) {
2938 if (ompt_enabled.ompt_callback_mutex_released) {
2939 // release_lock_last
2940 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002941 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002942 }
2943 } else if (ompt_enabled.ompt_callback_nest_lock) {
2944 // release_lock_previous
2945 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002946 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002947 }
2948 }
2949#endif
2950
Jonathan Peyton30419822017-05-12 18:01:32 +00002951 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002953 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002954#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002955 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002956#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 else if ((__kmp_user_lock_kind == lk_futex) &&
2958 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2959 OMP_NEST_LOCK_T_SIZE)) {
2960 lck = (kmp_user_lock_p)user_lock;
2961 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002963 else {
2964 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2965 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966
2967#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002968 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969#endif /* USE_ITT_BUILD */
2970
Jonathan Peyton30419822017-05-12 18:01:32 +00002971 int release_status;
2972 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002973#if OMPT_SUPPORT && OMPT_OPTIONAL
2974 // This is the case, if called from omp_init_lock_with_hint:
2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2976 if (!codeptr)
2977 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2978 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002979 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002980 if (ompt_enabled.ompt_callback_mutex_released) {
2981 // release_lock_last
2982 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002983 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002984 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002985 } else if (ompt_enabled.ompt_callback_nest_lock) {
2986 // release_lock_previous
2987 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002988 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002989 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002990 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002991#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002992
2993#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002994}
2995
2996/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002997int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2998 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002999
3000#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003001 int rc;
3002 int tag = KMP_EXTRACT_D_TAG(user_lock);
3003#if USE_ITT_BUILD
3004 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3005#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003006#if OMPT_SUPPORT && OMPT_OPTIONAL
3007 // This is the case, if called from omp_init_lock_with_hint:
3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3009 if (!codeptr)
3010 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3011 if (ompt_enabled.ompt_callback_mutex_acquire) {
3012 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3013 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003014 __ompt_get_mutex_impl_type(user_lock),
3015 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003016 }
3017#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003018#if KMP_USE_INLINED_TAS
3019 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3020 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3021 } else
3022#elif KMP_USE_INLINED_FUTEX
3023 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3024 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3025 } else
3026#endif
3027 {
3028 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3029 }
3030 if (rc) {
3031#if USE_ITT_BUILD
3032 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3033#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003034#if OMPT_SUPPORT && OMPT_OPTIONAL
3035 if (ompt_enabled.ompt_callback_mutex_acquired) {
3036 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003037 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003038 }
3039#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 return FTN_TRUE;
3041 } else {
3042#if USE_ITT_BUILD
3043 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3044#endif
3045 return FTN_FALSE;
3046 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003047
3048#else // KMP_USE_DYNAMIC_LOCK
3049
Jonathan Peyton30419822017-05-12 18:01:32 +00003050 kmp_user_lock_p lck;
3051 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 if ((__kmp_user_lock_kind == lk_tas) &&
3054 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3055 lck = (kmp_user_lock_p)user_lock;
3056 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003057#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003058 else if ((__kmp_user_lock_kind == lk_futex) &&
3059 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3060 lck = (kmp_user_lock_p)user_lock;
3061 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003062#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003063 else {
3064 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3065 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066
3067#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003068 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003069#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003070#if OMPT_SUPPORT && OMPT_OPTIONAL
3071 // This is the case, if called from omp_init_lock_with_hint:
3072 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3073 if (!codeptr)
3074 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3075 if (ompt_enabled.ompt_callback_mutex_acquire) {
3076 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3077 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00003078 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003079 }
3080#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003081
Jonathan Peyton30419822017-05-12 18:01:32 +00003082 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003084 if (rc) {
3085 __kmp_itt_lock_acquired(lck);
3086 } else {
3087 __kmp_itt_lock_cancelled(lck);
3088 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003090#if OMPT_SUPPORT && OMPT_OPTIONAL
3091 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003093 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003094 }
3095#endif
3096
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003098
Jonathan Peyton30419822017-05-12 18:01:32 +00003099/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003100
3101#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102}
3103
3104/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003105int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003106#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003107 int rc;
3108#if USE_ITT_BUILD
3109 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3110#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003111#if OMPT_SUPPORT && OMPT_OPTIONAL
3112 // This is the case, if called from omp_init_lock_with_hint:
3113 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3114 if (!codeptr)
3115 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3116 if (ompt_enabled.ompt_callback_mutex_acquire) {
3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3118 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003119 __ompt_get_mutex_impl_type(user_lock),
3120 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003121 }
3122#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003123 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3124#if USE_ITT_BUILD
3125 if (rc) {
3126 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3127 } else {
3128 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3129 }
3130#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003131#if OMPT_SUPPORT && OMPT_OPTIONAL
3132 if (ompt_enabled.enabled && rc) {
3133 if (rc == 1) {
3134 if (ompt_enabled.ompt_callback_mutex_acquired) {
3135 // lock_first
3136 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003137 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3138 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003139 }
3140 } else {
3141 if (ompt_enabled.ompt_callback_nest_lock) {
3142 // lock_next
3143 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003144 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003145 }
3146 }
3147 }
3148#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003149 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003150
3151#else // KMP_USE_DYNAMIC_LOCK
3152
Jonathan Peyton30419822017-05-12 18:01:32 +00003153 kmp_user_lock_p lck;
3154 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
Jonathan Peyton30419822017-05-12 18:01:32 +00003156 if ((__kmp_user_lock_kind == lk_tas) &&
3157 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3158 OMP_NEST_LOCK_T_SIZE)) {
3159 lck = (kmp_user_lock_p)user_lock;
3160 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003161#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 else if ((__kmp_user_lock_kind == lk_futex) &&
3163 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3164 OMP_NEST_LOCK_T_SIZE)) {
3165 lck = (kmp_user_lock_p)user_lock;
3166 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003168 else {
3169 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003171
3172#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003173 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003174#endif /* USE_ITT_BUILD */
3175
Joachim Protze82e94a52017-11-01 10:08:30 +00003176#if OMPT_SUPPORT && OMPT_OPTIONAL
3177 // This is the case, if called from omp_init_lock_with_hint:
3178 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3179 if (!codeptr)
3180 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3181 if (ompt_enabled.enabled) &&
3182 ompt_enabled.ompt_callback_mutex_acquire) {
3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3184 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003185 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3186 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003187 }
3188#endif
3189
Jonathan Peyton30419822017-05-12 18:01:32 +00003190 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003191#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003192 if (rc) {
3193 __kmp_itt_lock_acquired(lck);
3194 } else {
3195 __kmp_itt_lock_cancelled(lck);
3196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003197#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003198#if OMPT_SUPPORT && OMPT_OPTIONAL
3199 if (ompt_enabled.enabled && rc) {
3200 if (rc == 1) {
3201 if (ompt_enabled.ompt_callback_mutex_acquired) {
3202 // lock_first
3203 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003204 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003205 }
3206 } else {
3207 if (ompt_enabled.ompt_callback_nest_lock) {
3208 // lock_next
3209 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003210 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003211 }
3212 }
3213 }
3214#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003215 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003216
Jonathan Peyton30419822017-05-12 18:01:32 +00003217/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003218
3219#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220}
3221
Jonathan Peyton30419822017-05-12 18:01:32 +00003222// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003223
Jonathan Peyton30419822017-05-12 18:01:32 +00003224// keep the selected method in a thread local structure for cross-function
3225// usage: will be used in __kmpc_end_reduce* functions;
3226// another solution: to re-determine the method one more time in
3227// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003228// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003229#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3230 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231
Jonathan Peyton30419822017-05-12 18:01:32 +00003232#define __KMP_GET_REDUCTION_METHOD(gtid) \
3233 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234
Jonathan Peyton30419822017-05-12 18:01:32 +00003235// description of the packed_reduction_method variable: look at the macros in
3236// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003237
3238// used in a critical section reduce block
3239static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003240__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3241 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003242
Jonathan Peyton30419822017-05-12 18:01:32 +00003243 // this lock was visible to a customer and to the threading profile tool as a
3244 // serial overhead span (although it's used for an internal purpose only)
3245 // why was it visible in previous implementation?
3246 // should we keep it visible in new reduce block?
3247 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003249#if KMP_USE_DYNAMIC_LOCK
3250
Jonathan Peyton30419822017-05-12 18:01:32 +00003251 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3252 // Check if it is initialized.
3253 if (*lk == 0) {
3254 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3255 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3256 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003257 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003258 __kmp_init_indirect_csptr(crit, loc, global_tid,
3259 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003260 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003261 }
3262 // Branch for accessing the actual lock object and set operation. This
3263 // branching is inevitable since this lock initialization does not follow the
3264 // normal dispatch path (lock table is not used).
3265 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3266 lck = (kmp_user_lock_p)lk;
3267 KMP_DEBUG_ASSERT(lck != NULL);
3268 if (__kmp_env_consistency_check) {
3269 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3270 }
3271 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3272 } else {
3273 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3274 lck = ilk->lock;
3275 KMP_DEBUG_ASSERT(lck != NULL);
3276 if (__kmp_env_consistency_check) {
3277 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3278 }
3279 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3280 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003281
3282#else // KMP_USE_DYNAMIC_LOCK
3283
Jonathan Peyton30419822017-05-12 18:01:32 +00003284 // We know that the fast reduction code is only emitted by Intel compilers
3285 // with 32 byte critical sections. If there isn't enough space, then we
3286 // have to use a pointer.
3287 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3288 lck = (kmp_user_lock_p)crit;
3289 } else {
3290 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3291 }
3292 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003293
Jonathan Peyton30419822017-05-12 18:01:32 +00003294 if (__kmp_env_consistency_check)
3295 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003296
Jonathan Peyton30419822017-05-12 18:01:32 +00003297 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003298
3299#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003300}
3301
3302// used in a critical section reduce block
3303static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003304__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3305 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003306
Jonathan Peyton30419822017-05-12 18:01:32 +00003307 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003308
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003309#if KMP_USE_DYNAMIC_LOCK
3310
Jonathan Peyton30419822017-05-12 18:01:32 +00003311 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3312 lck = (kmp_user_lock_p)crit;
3313 if (__kmp_env_consistency_check)
3314 __kmp_pop_sync(global_tid, ct_critical, loc);
3315 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3316 } else {
3317 kmp_indirect_lock_t *ilk =
3318 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3319 if (__kmp_env_consistency_check)
3320 __kmp_pop_sync(global_tid, ct_critical, loc);
3321 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3322 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003323
3324#else // KMP_USE_DYNAMIC_LOCK
3325
Jonathan Peyton30419822017-05-12 18:01:32 +00003326 // We know that the fast reduction code is only emitted by Intel compilers
3327 // with 32 byte critical sections. If there isn't enough space, then we have
3328 // to use a pointer.
3329 if (__kmp_base_user_lock_size > 32) {
3330 lck = *((kmp_user_lock_p *)crit);
3331 KMP_ASSERT(lck != NULL);
3332 } else {
3333 lck = (kmp_user_lock_p)crit;
3334 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003335
Jonathan Peyton30419822017-05-12 18:01:32 +00003336 if (__kmp_env_consistency_check)
3337 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003338
Jonathan Peyton30419822017-05-12 18:01:32 +00003339 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003341#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003342} // __kmp_end_critical_section_reduce_block
3343
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003344#if OMP_40_ENABLED
3345static __forceinline int
3346__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3347 int *task_state) {
3348 kmp_team_t *team;
3349
3350 // Check if we are inside the teams construct?
3351 if (th->th.th_teams_microtask) {
3352 *team_p = team = th->th.th_team;
3353 if (team->t.t_level == th->th.th_teams_level) {
3354 // This is reduction at teams construct.
3355 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3356 // Let's swap teams temporarily for the reduction.
3357 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3358 th->th.th_team = team->t.t_parent;
3359 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3360 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3361 *task_state = th->th.th_task_state;
3362 th->th.th_task_state = 0;
3363
3364 return 1;
3365 }
3366 }
3367 return 0;
3368}
3369
3370static __forceinline void
3371__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3372 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3373 th->th.th_info.ds.ds_tid = 0;
3374 th->th.th_team = team;
3375 th->th.th_team_nproc = team->t.t_nproc;
3376 th->th.th_task_team = team->t.t_task_team[task_state];
3377 th->th.th_task_state = task_state;
3378}
3379#endif
3380
Jim Cownie5e8470a2013-09-27 10:38:44 +00003381/* 2.a.i. Reduce Block without a terminating barrier */
3382/*!
3383@ingroup SYNCHRONIZATION
3384@param loc source location information
3385@param global_tid global thread number
3386@param num_vars number of items (variables) to be reduced
3387@param reduce_size size of data in bytes to be reduced
3388@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003389@param reduce_func callback function providing reduction operation on two
3390operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003391@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003392@result 1 for the master thread, 0 for all other team threads, 2 for all team
3393threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003394
3395The nowait version is used for a reduce clause with the nowait argument.
3396*/
3397kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003398__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3399 size_t reduce_size, void *reduce_data,
3400 void (*reduce_func)(void *lhs_data, void *rhs_data),
3401 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003402
Jonathan Peyton30419822017-05-12 18:01:32 +00003403 KMP_COUNT_BLOCK(REDUCE_nowait);
3404 int retval = 0;
3405 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003406#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003407 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003408 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003409 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003410#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003412
Jonathan Peyton30419822017-05-12 18:01:32 +00003413 // why do we need this initialization here at all?
3414 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003415
Jonathan Peyton30419822017-05-12 18:01:32 +00003416 // do not call __kmp_serial_initialize(), it will be called by
3417 // __kmp_parallel_initialize() if needed
3418 // possible detection of false-positive race by the threadchecker ???
3419 if (!TCR_4(__kmp_init_parallel))
3420 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003421
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003422#if OMP_50_ENABLED
3423 __kmp_resume_if_soft_paused();
3424#endif
3425
Jonathan Peyton30419822017-05-12 18:01:32 +00003426// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003427#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 if (__kmp_env_consistency_check)
3429 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003430#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003431 if (__kmp_env_consistency_check)
3432 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003433#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003434
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003435#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003436 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003437 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003438#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003439
Jonathan Peyton30419822017-05-12 18:01:32 +00003440 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3441 // the value should be kept in a variable
3442 // the variable should be either a construct-specific or thread-specific
3443 // property, not a team specific property
3444 // (a thread can reach the next reduce block on the next construct, reduce
3445 // method may differ on the next construct)
3446 // an ident_t "loc" parameter could be used as a construct-specific property
3447 // (what if loc == 0?)
3448 // (if both construct-specific and team-specific variables were shared,
3449 // then unness extra syncs should be needed)
3450 // a thread-specific variable is better regarding two issues above (next
3451 // construct and extra syncs)
3452 // a thread-specific "th_local.reduction_method" variable is used currently
3453 // each thread executes 'determine' and 'set' lines (no need to execute by one
3454 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 packed_reduction_method = __kmp_determine_reduction_method(
3457 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3458 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003461
Jonathan Peyton30419822017-05-12 18:01:32 +00003462 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3463 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003464
Jonathan Peyton30419822017-05-12 18:01:32 +00003465 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
Jonathan Peyton30419822017-05-12 18:01:32 +00003467 // usage: if team size == 1, no synchronization is required ( Intel
3468 // platforms only )
3469 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003470
Jonathan Peyton30419822017-05-12 18:01:32 +00003471 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003472
Jonathan Peyton30419822017-05-12 18:01:32 +00003473 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003474
Jonathan Peyton30419822017-05-12 18:01:32 +00003475 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3476 // won't be called by the code gen)
3477 // (it's not quite good, because the checking block has been closed by
3478 // this 'pop',
3479 // but atomic operation has not been executed yet, will be executed
3480 // slightly later, literally on next instruction)
3481 if (__kmp_env_consistency_check)
3482 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003483
Jonathan Peyton30419822017-05-12 18:01:32 +00003484 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3485 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003486
Jonathan Peyton30419822017-05-12 18:01:32 +00003487// AT: performance issue: a real barrier here
3488// AT: (if master goes slow, other threads are blocked here waiting for the
3489// master to come and release them)
3490// AT: (it's not what a customer might expect specifying NOWAIT clause)
3491// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3492// be confusing to a customer)
3493// AT: another implementation of *barrier_gather*nowait() (or some other design)
3494// might go faster and be more in line with sense of NOWAIT
3495// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003496
Jonathan Peyton30419822017-05-12 18:01:32 +00003497// this barrier should be invisible to a customer and to the threading profile
3498// tool (it's neither a terminating barrier nor customer's code, it's
3499// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003500#if OMPT_SUPPORT
3501 // JP: can this barrier potentially leed to task scheduling?
3502 // JP: as long as there is a barrier in the implementation, OMPT should and
3503 // will provide the barrier events
3504 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003505 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003506 if (ompt_enabled.enabled) {
3507 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003508 if (ompt_frame->enter_frame.ptr == NULL)
3509 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003510 OMPT_STORE_RETURN_ADDRESS(global_tid);
3511 }
3512#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003513#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003514 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003515#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 retval =
3517 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3518 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3519 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003520#if OMPT_SUPPORT && OMPT_OPTIONAL
3521 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003522 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003523 }
3524#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003525
Jonathan Peyton30419822017-05-12 18:01:32 +00003526 // all other workers except master should do this pop here
3527 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3528 if (__kmp_env_consistency_check) {
3529 if (retval == 0) {
3530 __kmp_pop_sync(global_tid, ct_reduce, loc);
3531 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003532 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003533
3534 } else {
3535
3536 // should never reach this block
3537 KMP_ASSERT(0); // "unexpected method"
3538 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003539#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003540 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003541 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003543#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 KA_TRACE(
3545 10,
3546 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3547 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003548
Jonathan Peyton30419822017-05-12 18:01:32 +00003549 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550}
3551
3552/*!
3553@ingroup SYNCHRONIZATION
3554@param loc source location information
3555@param global_tid global thread id.
3556@param lck pointer to the unique lock data structure
3557
3558Finish the execution of a reduce nowait.
3559*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003560void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3561 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
Jonathan Peyton30419822017-05-12 18:01:32 +00003563 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003564
Jonathan Peyton30419822017-05-12 18:01:32 +00003565 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003566
Jonathan Peyton30419822017-05-12 18:01:32 +00003567 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003568
Jonathan Peyton30419822017-05-12 18:01:32 +00003569 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003570
Jonathan Peyton30419822017-05-12 18:01:32 +00003571 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003572
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 // usage: if team size == 1, no synchronization is required ( on Intel
3576 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003577
Jonathan Peyton30419822017-05-12 18:01:32 +00003578 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579
Jonathan Peyton30419822017-05-12 18:01:32 +00003580 // neither master nor other workers should get here
3581 // (code gen does not generate this call in case 2: atomic reduce block)
3582 // actually it's better to remove this elseif at all;
3583 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003584
Jonathan Peyton30419822017-05-12 18:01:32 +00003585 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3586 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003587
Jonathan Peyton30419822017-05-12 18:01:32 +00003588 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003589
Jonathan Peyton30419822017-05-12 18:01:32 +00003590 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003591
Jonathan Peyton30419822017-05-12 18:01:32 +00003592 // should never reach this block
3593 KMP_ASSERT(0); // "unexpected method"
3594 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595
Jonathan Peyton30419822017-05-12 18:01:32 +00003596 if (__kmp_env_consistency_check)
3597 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003598
Jonathan Peyton30419822017-05-12 18:01:32 +00003599 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3600 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003601
Jonathan Peyton30419822017-05-12 18:01:32 +00003602 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003603}
3604
3605/* 2.a.ii. Reduce Block with a terminating barrier */
3606
3607/*!
3608@ingroup SYNCHRONIZATION
3609@param loc source location information
3610@param global_tid global thread number
3611@param num_vars number of items (variables) to be reduced
3612@param reduce_size size of data in bytes to be reduced
3613@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003614@param reduce_func callback function providing reduction operation on two
3615operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003616@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003617@result 1 for the master thread, 0 for all other team threads, 2 for all team
3618threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003619
3620A blocking reduce that includes an implicit barrier.
3621*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003622kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3623 size_t reduce_size, void *reduce_data,
3624 void (*reduce_func)(void *lhs_data, void *rhs_data),
3625 kmp_critical_name *lck) {
3626 KMP_COUNT_BLOCK(REDUCE_wait);
3627 int retval = 0;
3628 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003629#if OMP_40_ENABLED
3630 kmp_info_t *th;
3631 kmp_team_t *team;
3632 int teams_swapped = 0, task_state;
3633#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003634
Jonathan Peyton30419822017-05-12 18:01:32 +00003635 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003636
Jonathan Peyton30419822017-05-12 18:01:32 +00003637 // why do we need this initialization here at all?
3638 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639
Jonathan Peyton30419822017-05-12 18:01:32 +00003640 // do not call __kmp_serial_initialize(), it will be called by
3641 // __kmp_parallel_initialize() if needed
3642 // possible detection of false-positive race by the threadchecker ???
3643 if (!TCR_4(__kmp_init_parallel))
3644 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003646#if OMP_50_ENABLED
3647 __kmp_resume_if_soft_paused();
3648#endif
3649
Jonathan Peyton30419822017-05-12 18:01:32 +00003650// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003651#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003652 if (__kmp_env_consistency_check)
3653 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003654#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003655 if (__kmp_env_consistency_check)
3656 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003657#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003658
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003659#if OMP_40_ENABLED
3660 th = __kmp_thread_from_gtid(global_tid);
3661 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3662#endif // OMP_40_ENABLED
3663
Jonathan Peyton30419822017-05-12 18:01:32 +00003664 packed_reduction_method = __kmp_determine_reduction_method(
3665 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3666 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003667
Jonathan Peyton30419822017-05-12 18:01:32 +00003668 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003669
Jonathan Peyton30419822017-05-12 18:01:32 +00003670 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3671 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672
Jonathan Peyton30419822017-05-12 18:01:32 +00003673 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003674
Jonathan Peyton30419822017-05-12 18:01:32 +00003675 // usage: if team size == 1, no synchronization is required ( Intel
3676 // platforms only )
3677 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003678
Jonathan Peyton30419822017-05-12 18:01:32 +00003679 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003680
Jonathan Peyton30419822017-05-12 18:01:32 +00003681 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3684 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685
Jonathan Peyton30419822017-05-12 18:01:32 +00003686// case tree_reduce_block:
3687// this barrier should be visible to a customer and to the threading profile
3688// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003689#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003690 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003691 if (ompt_enabled.enabled) {
3692 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003693 if (ompt_frame->enter_frame.ptr == NULL)
3694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003695 OMPT_STORE_RETURN_ADDRESS(global_tid);
3696 }
3697#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003698#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003699 __kmp_threads[global_tid]->th.th_ident =
3700 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003701#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003702 retval =
3703 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3704 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3705 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003706#if OMPT_SUPPORT && OMPT_OPTIONAL
3707 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003708 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003709 }
3710#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711
Jonathan Peyton30419822017-05-12 18:01:32 +00003712 // all other workers except master should do this pop here
3713 // ( none of other workers except master will enter __kmpc_end_reduce() )
3714 if (__kmp_env_consistency_check) {
3715 if (retval == 0) { // 0: all other workers; 1: master
3716 __kmp_pop_sync(global_tid, ct_reduce, loc);
3717 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 }
3719
Jonathan Peyton30419822017-05-12 18:01:32 +00003720 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003721
Jonathan Peyton30419822017-05-12 18:01:32 +00003722 // should never reach this block
3723 KMP_ASSERT(0); // "unexpected method"
3724 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003725#if OMP_40_ENABLED
3726 if (teams_swapped) {
3727 __kmp_restore_swapped_teams(th, team, task_state);
3728 }
3729#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003730
3731 KA_TRACE(10,
3732 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3733 global_tid, packed_reduction_method, retval));
3734
3735 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736}
3737
3738/*!
3739@ingroup SYNCHRONIZATION
3740@param loc source location information
3741@param global_tid global thread id.
3742@param lck pointer to the unique lock data structure
3743
3744Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003745The <tt>lck</tt> pointer must be the same as that used in the corresponding
3746start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003747*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003748void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3749 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750
Jonathan Peyton30419822017-05-12 18:01:32 +00003751 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003752#if OMP_40_ENABLED
3753 kmp_info_t *th;
3754 kmp_team_t *team;
3755 int teams_swapped = 0, task_state;
3756#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757
Jonathan Peyton30419822017-05-12 18:01:32 +00003758 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003760#if OMP_40_ENABLED
3761 th = __kmp_thread_from_gtid(global_tid);
3762 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3763#endif // OMP_40_ENABLED
3764
Jonathan Peyton30419822017-05-12 18:01:32 +00003765 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003766
Jonathan Peyton30419822017-05-12 18:01:32 +00003767 // this barrier should be visible to a customer and to the threading profile
3768 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003771
Jonathan Peyton30419822017-05-12 18:01:32 +00003772 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003773
Jonathan Peyton30419822017-05-12 18:01:32 +00003774// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003775#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003776 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003777 if (ompt_enabled.enabled) {
3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003779 if (ompt_frame->enter_frame.ptr == NULL)
3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003781 OMPT_STORE_RETURN_ADDRESS(global_tid);
3782 }
3783#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003784#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003785 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003786#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003788#if OMPT_SUPPORT && OMPT_OPTIONAL
3789 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003790 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003791 }
3792#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003793
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003795
Jonathan Peyton30419822017-05-12 18:01:32 +00003796// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003797
Jonathan Peyton30419822017-05-12 18:01:32 +00003798// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003799#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003800 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003801 if (ompt_enabled.enabled) {
3802 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003803 if (ompt_frame->enter_frame.ptr == NULL)
3804 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003805 OMPT_STORE_RETURN_ADDRESS(global_tid);
3806 }
3807#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003808#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003809 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003810#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003811 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003812#if OMPT_SUPPORT && OMPT_OPTIONAL
3813 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003814 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003815 }
3816#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003817
Jonathan Peyton30419822017-05-12 18:01:32 +00003818 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003819
Joachim Protze82e94a52017-11-01 10:08:30 +00003820#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003821 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003822 if (ompt_enabled.enabled) {
3823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003824 if (ompt_frame->enter_frame.ptr == NULL)
3825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003826 OMPT_STORE_RETURN_ADDRESS(global_tid);
3827 }
3828#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003829// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003830#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003831 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003832#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003833 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003834#if OMPT_SUPPORT && OMPT_OPTIONAL
3835 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003836 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003837 }
3838#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003839
Jonathan Peyton30419822017-05-12 18:01:32 +00003840 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3841 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jonathan Peyton30419822017-05-12 18:01:32 +00003843 // only master executes here (master releases all other workers)
3844 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3845 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003848
Jonathan Peyton30419822017-05-12 18:01:32 +00003849 // should never reach this block
3850 KMP_ASSERT(0); // "unexpected method"
3851 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003852#if OMP_40_ENABLED
3853 if (teams_swapped) {
3854 __kmp_restore_swapped_teams(th, team, task_state);
3855 }
3856#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003857
Jonathan Peyton30419822017-05-12 18:01:32 +00003858 if (__kmp_env_consistency_check)
3859 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003860
Jonathan Peyton30419822017-05-12 18:01:32 +00003861 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3862 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003863
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003865}
3866
3867#undef __KMP_GET_REDUCTION_METHOD
3868#undef __KMP_SET_REDUCTION_METHOD
3869
Jonathan Peyton30419822017-05-12 18:01:32 +00003870/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871
Jonathan Peyton30419822017-05-12 18:01:32 +00003872kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003873
Jonathan Peyton30419822017-05-12 18:01:32 +00003874 kmp_int32 gtid;
3875 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003876
Jonathan Peyton30419822017-05-12 18:01:32 +00003877 gtid = __kmp_get_gtid();
3878 if (gtid < 0) {
3879 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003880 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003881 thread = __kmp_thread_from_gtid(gtid);
3882 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003883
3884} // __kmpc_get_taskid
3885
Jonathan Peyton30419822017-05-12 18:01:32 +00003886kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003887
Jonathan Peyton30419822017-05-12 18:01:32 +00003888 kmp_int32 gtid;
3889 kmp_info_t *thread;
3890 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003891
Jonathan Peyton30419822017-05-12 18:01:32 +00003892 gtid = __kmp_get_gtid();
3893 if (gtid < 0) {
3894 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003895 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003896 thread = __kmp_thread_from_gtid(gtid);
3897 parent_task = thread->th.th_current_task->td_parent;
3898 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003899
3900} // __kmpc_get_parent_taskid
3901
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003902#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003903/*!
3904@ingroup WORK_SHARING
3905@param loc source location information.
3906@param gtid global thread number.
3907@param num_dims number of associated doacross loops.
3908@param dims info on loops bounds.
3909
3910Initialize doacross loop information.
3911Expect compiler send us inclusive bounds,
3912e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3913*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003914void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003915 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003916 int j, idx;
3917 kmp_int64 last, trace_count;
3918 kmp_info_t *th = __kmp_threads[gtid];
3919 kmp_team_t *team = th->th.th_team;
3920 kmp_uint32 *flags;
3921 kmp_disp_t *pr_buf = th->th.th_dispatch;
3922 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003923
Jonathan Peyton30419822017-05-12 18:01:32 +00003924 KA_TRACE(
3925 20,
3926 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3927 gtid, num_dims, !team->t.t_serialized));
3928 KMP_DEBUG_ASSERT(dims != NULL);
3929 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003930
Jonathan Peyton30419822017-05-12 18:01:32 +00003931 if (team->t.t_serialized) {
3932 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3933 return; // no dependencies if team is serialized
3934 }
3935 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3936 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3937 // the next loop
3938 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003939
Jonathan Peyton30419822017-05-12 18:01:32 +00003940 // Save bounds info into allocated private buffer
3941 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3942 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3943 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3944 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3945 pr_buf->th_doacross_info[0] =
3946 (kmp_int64)num_dims; // first element is number of dimensions
3947 // Save also address of num_done in order to access it later without knowing
3948 // the buffer index
3949 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3950 pr_buf->th_doacross_info[2] = dims[0].lo;
3951 pr_buf->th_doacross_info[3] = dims[0].up;
3952 pr_buf->th_doacross_info[4] = dims[0].st;
3953 last = 5;
3954 for (j = 1; j < num_dims; ++j) {
3955 kmp_int64
3956 range_length; // To keep ranges of all dimensions but the first dims[0]
3957 if (dims[j].st == 1) { // most common case
3958 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3959 range_length = dims[j].up - dims[j].lo + 1;
3960 } else {
3961 if (dims[j].st > 0) {
3962 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3963 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3964 } else { // negative increment
3965 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3966 range_length =
3967 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3968 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003969 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003970 pr_buf->th_doacross_info[last++] = range_length;
3971 pr_buf->th_doacross_info[last++] = dims[j].lo;
3972 pr_buf->th_doacross_info[last++] = dims[j].up;
3973 pr_buf->th_doacross_info[last++] = dims[j].st;
3974 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003975
Jonathan Peyton30419822017-05-12 18:01:32 +00003976 // Compute total trip count.
3977 // Start with range of dims[0] which we don't need to keep in the buffer.
3978 if (dims[0].st == 1) { // most common case
3979 trace_count = dims[0].up - dims[0].lo + 1;
3980 } else if (dims[0].st > 0) {
3981 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3982 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3983 } else { // negative increment
3984 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3985 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3986 }
3987 for (j = 1; j < num_dims; ++j) {
3988 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3989 }
3990 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003991
Jonathan Peyton30419822017-05-12 18:01:32 +00003992 // Check if shared buffer is not occupied by other loop (idx -
3993 // __kmp_dispatch_num_buffers)
3994 if (idx != sh_buf->doacross_buf_idx) {
3995 // Shared buffer is occupied, wait for it to be free
Jonathan Peytone47d32f2019-02-28 19:11:29 +00003996 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3997 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003998 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003999#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00004000 // Check if we are the first thread. After the CAS the first thread gets 0,
4001 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004002 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4003 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4004 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4005#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004006 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004007 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4008#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004009 if (flags == NULL) {
4010 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004011 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004012 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4013 KMP_MB();
4014 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004015 } else if (flags == (kmp_uint32 *)1) {
4016#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00004017 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004018 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4019#else
4020 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4021#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004022 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004023 KMP_MB();
4024 } else {
4025 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004026 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004027 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00004028 pr_buf->th_doacross_flags =
4029 sh_buf->doacross_flags; // save private copy in order to not
4030 // touch shared buffer on each iteration
4031 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004032}
4033
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004034void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004035 kmp_int32 shft, num_dims, i;
4036 kmp_uint32 flag;
4037 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4038 kmp_info_t *th = __kmp_threads[gtid];
4039 kmp_team_t *team = th->th.th_team;
4040 kmp_disp_t *pr_buf;
4041 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004042
Jonathan Peyton30419822017-05-12 18:01:32 +00004043 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4044 if (team->t.t_serialized) {
4045 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4046 return; // no dependencies if team is serialized
4047 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004048
Jonathan Peyton30419822017-05-12 18:01:32 +00004049 // calculate sequential iteration number and check out-of-bounds condition
4050 pr_buf = th->th.th_dispatch;
4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4052 num_dims = pr_buf->th_doacross_info[0];
4053 lo = pr_buf->th_doacross_info[2];
4054 up = pr_buf->th_doacross_info[3];
4055 st = pr_buf->th_doacross_info[4];
4056 if (st == 1) { // most common case
4057 if (vec[0] < lo || vec[0] > up) {
4058 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4059 "bounds [%lld,%lld]\n",
4060 gtid, vec[0], lo, up));
4061 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004062 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004063 iter_number = vec[0] - lo;
4064 } else if (st > 0) {
4065 if (vec[0] < lo || vec[0] > up) {
4066 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4067 "bounds [%lld,%lld]\n",
4068 gtid, vec[0], lo, up));
4069 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004070 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004071 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4072 } else { // negative increment
4073 if (vec[0] > lo || vec[0] < up) {
4074 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4075 "bounds [%lld,%lld]\n",
4076 gtid, vec[0], lo, up));
4077 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004078 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004079 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4080 }
4081 for (i = 1; i < num_dims; ++i) {
4082 kmp_int64 iter, ln;
4083 kmp_int32 j = i * 4;
4084 ln = pr_buf->th_doacross_info[j + 1];
4085 lo = pr_buf->th_doacross_info[j + 2];
4086 up = pr_buf->th_doacross_info[j + 3];
4087 st = pr_buf->th_doacross_info[j + 4];
4088 if (st == 1) {
4089 if (vec[i] < lo || vec[i] > up) {
4090 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4091 "bounds [%lld,%lld]\n",
4092 gtid, vec[i], lo, up));
4093 return;
4094 }
4095 iter = vec[i] - lo;
4096 } else if (st > 0) {
4097 if (vec[i] < lo || vec[i] > up) {
4098 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4099 "bounds [%lld,%lld]\n",
4100 gtid, vec[i], lo, up));
4101 return;
4102 }
4103 iter = (kmp_uint64)(vec[i] - lo) / st;
4104 } else { // st < 0
4105 if (vec[i] > lo || vec[i] < up) {
4106 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4107 "bounds [%lld,%lld]\n",
4108 gtid, vec[i], lo, up));
4109 return;
4110 }
4111 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4112 }
4113 iter_number = iter + ln * iter_number;
4114 }
4115 shft = iter_number % 32; // use 32-bit granularity
4116 iter_number >>= 5; // divided by 32
4117 flag = 1 << shft;
4118 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4119 KMP_YIELD(TRUE);
4120 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004121 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004122 KA_TRACE(20,
4123 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4124 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004125}
4126
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004127void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004128 kmp_int32 shft, num_dims, i;
4129 kmp_uint32 flag;
4130 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4131 kmp_info_t *th = __kmp_threads[gtid];
4132 kmp_team_t *team = th->th.th_team;
4133 kmp_disp_t *pr_buf;
4134 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004135
Jonathan Peyton30419822017-05-12 18:01:32 +00004136 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4137 if (team->t.t_serialized) {
4138 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4139 return; // no dependencies if team is serialized
4140 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004141
Jonathan Peyton30419822017-05-12 18:01:32 +00004142 // calculate sequential iteration number (same as in "wait" but no
4143 // out-of-bounds checks)
4144 pr_buf = th->th.th_dispatch;
4145 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4146 num_dims = pr_buf->th_doacross_info[0];
4147 lo = pr_buf->th_doacross_info[2];
4148 st = pr_buf->th_doacross_info[4];
4149 if (st == 1) { // most common case
4150 iter_number = vec[0] - lo;
4151 } else if (st > 0) {
4152 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4153 } else { // negative increment
4154 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4155 }
4156 for (i = 1; i < num_dims; ++i) {
4157 kmp_int64 iter, ln;
4158 kmp_int32 j = i * 4;
4159 ln = pr_buf->th_doacross_info[j + 1];
4160 lo = pr_buf->th_doacross_info[j + 2];
4161 st = pr_buf->th_doacross_info[j + 4];
4162 if (st == 1) {
4163 iter = vec[i] - lo;
4164 } else if (st > 0) {
4165 iter = (kmp_uint64)(vec[i] - lo) / st;
4166 } else { // st < 0
4167 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004168 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004169 iter_number = iter + ln * iter_number;
4170 }
4171 shft = iter_number % 32; // use 32-bit granularity
4172 iter_number >>= 5; // divided by 32
4173 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004174 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004175 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004176 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004177 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4178 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004179}
4180
Jonathan Peyton30419822017-05-12 18:01:32 +00004181void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004182 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004183 kmp_info_t *th = __kmp_threads[gtid];
4184 kmp_team_t *team = th->th.th_team;
4185 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004186
Jonathan Peyton30419822017-05-12 18:01:32 +00004187 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4188 if (team->t.t_serialized) {
4189 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4190 return; // nothing to do
4191 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004192 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004193 if (num_done == th->th.th_team_nproc) {
4194 // we are the last thread, need to free shared resources
4195 int idx = pr_buf->th_doacross_buf_idx - 1;
4196 dispatch_shared_info_t *sh_buf =
4197 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4198 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4199 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004200 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004201 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004202 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004203 sh_buf->doacross_flags = NULL;
4204 sh_buf->doacross_num_done = 0;
4205 sh_buf->doacross_buf_idx +=
4206 __kmp_dispatch_num_buffers; // free buffer for future re-use
4207 }
4208 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004209 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004210 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4211 pr_buf->th_doacross_info = NULL;
4212 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004213}
4214#endif
4215
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004216#if OMP_50_ENABLED
Jonathan Peytonebf18302019-04-08 17:59:28 +00004217/* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4218void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4219 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4220}
4221
4222void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4223 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4224}
4225
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004226int __kmpc_get_target_offload(void) {
4227 if (!__kmp_init_serial) {
4228 __kmp_serial_initialize();
4229 }
4230 return __kmp_target_offload;
4231}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004232
4233int __kmpc_pause_resource(kmp_pause_status_t level) {
4234 if (!__kmp_init_serial) {
4235 return 1; // Can't pause if runtime is not initialized
4236 }
4237 return __kmp_pause_resource(level);
4238}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004239#endif // OMP_50_ENABLED
4240
Jim Cownie5e8470a2013-09-27 10:38:44 +00004241// end of file //