blob: ead7855b567de6e3c997ceff740dd020cad023a6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000021#include "ompt-specific.h"
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022
Jim Cownie5e8470a2013-09-27 10:38:44 +000023#define MAX_MESSAGE 512
24
Jonathan Peyton30419822017-05-12 18:01:32 +000025// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000027
28/*!
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
32 *
33 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000034 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000035 */
Jonathan Peyton30419822017-05-12 18:01:32 +000036void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43 } else if (__kmp_ignore_mppbeg() == FALSE) {
44 // By default __kmp_ignore_mppbeg() returns TRUE.
45 __kmp_internal_begin();
46 KC_TRACE(10, ("__kmpc_begin: called\n"));
47 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000048}
49
50/*!
51 * @ingroup STARTUP_SHUTDOWN
52 * @param loc source location information
53 *
Jonathan Peyton30419822017-05-12 18:01:32 +000054 * Shutdown the runtime library. This is also optional, and even if called will
55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
56 * zero.
57 */
58void __kmpc_end(ident_t *loc) {
59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62 // returns FALSE and __kmpc_end() will unregister this root (it can cause
63 // library shut down).
64 if (__kmp_ignore_mppend() == FALSE) {
65 KC_TRACE(10, ("__kmpc_end: called\n"));
66 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000067
Jonathan Peyton30419822017-05-12 18:01:32 +000068 __kmp_internal_end_thread(-1);
69 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000070#if KMP_OS_WINDOWS && OMPT_SUPPORT
71 // Normal exit process on Windows does not allow worker threads of the final
72 // parallel region to finish reporting their events, so shutting down the
73 // library here fixes the issue at least for the cases where __kmpc_end() is
74 // placed properly.
75 if (ompt_enabled.enabled)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
77#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000078}
79
80/*!
81@ingroup THREAD_STATES
82@param loc Source location information.
83@return The global thread index of the active thread.
84
85This function can be called in any context.
86
87If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000088single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
89that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000090active parallel construct. (Or zero if there is no active parallel
91construct, since the master thread is necessarily thread zero).
92
93If multiple non-OpenMP threads all enter an OpenMP construct then this
94will be a unique thread identifier among all the threads created by
Kazuaki Ishizaki42016792020-04-04 12:06:29 +090095the OpenMP runtime (but the value cannot be defined in terms of
Jim Cownie5e8470a2013-09-27 10:38:44 +000096OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000097*/
Jonathan Peyton30419822017-05-12 18:01:32 +000098kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
99 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100
Jonathan Peyton30419822017-05-12 18:01:32 +0000101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102
Jonathan Peyton30419822017-05-12 18:01:32 +0000103 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104}
105
106/*!
107@ingroup THREAD_STATES
108@param loc Source location information.
109@return The number of threads under control of the OpenMP<sup>*</sup> runtime
110
111This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000112It returns the total number of threads under the control of the OpenMP runtime.
113That is not a number that can be determined by any OpenMP standard calls, since
114the library may be called from more than one non-OpenMP thread, and this
115reflects the total over all such calls. Similarly the runtime maintains
116underlying threads even when they are not active (since the cost of creating
117and destroying OS threads is high), this call counts all such threads even if
118they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000120kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
121 KC_TRACE(10,
122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123
Jonathan Peyton30419822017-05-12 18:01:32 +0000124 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000125}
126
127/*!
128@ingroup THREAD_STATES
129@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000130@return The thread number of the calling thread in the innermost active parallel
131construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000132*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000133kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136}
137
138/*!
139@ingroup THREAD_STATES
140@param loc Source location information.
141@return The number of threads in the innermost active parallel construct.
142*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000143kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145
Jonathan Peyton30419822017-05-12 18:01:32 +0000146 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000147}
148
149/*!
150 * @ingroup DEPRECATED
151 * @param loc location description
152 *
153 * This function need not be called. It always returns TRUE.
154 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000155kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000156#ifndef KMP_DEBUG
157
Jonathan Peyton30419822017-05-12 18:01:32 +0000158 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
160#else
161
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 const char *semi2;
163 const char *semi3;
164 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000165
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000167 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000168 }
169 semi2 = loc->psource;
170 if (semi2 == NULL) {
171 return TRUE;
172 }
173 semi2 = strchr(semi2, ';');
174 if (semi2 == NULL) {
175 return TRUE;
176 }
177 semi2 = strchr(semi2 + 1, ';');
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 if (__kmp_par_range_filename[0]) {
182 const char *name = semi2 - 1;
183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
184 name--;
185 }
186 if ((*name == '/') || (*name == ';')) {
187 name++;
188 }
189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190 return __kmp_par_range < 0;
191 }
192 }
193 semi3 = strchr(semi2 + 1, ';');
194 if (__kmp_par_range_routine[0]) {
195 if ((semi3 != NULL) && (semi3 > semi2) &&
196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197 return __kmp_par_range < 0;
198 }
199 }
200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202 return __kmp_par_range > 0;
203 }
204 return __kmp_par_range < 0;
205 }
206 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207
208#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209}
210
211/*!
212@ingroup THREAD_STATES
213@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000214@return 1 if this thread is executing inside an active parallel region, zero if
215not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000217kmp_int32 __kmpc_in_parallel(ident_t *loc) {
218 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219}
220
221/*!
222@ingroup PARALLEL
223@param loc source location information
224@param global_tid global thread number
225@param num_threads number of threads requested for this parallel construct
226
227Set the number of threads to be used by the next fork spawned by this thread.
228This call is only required if the parallel construct has a `num_threads` clause.
229*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000230void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
231 kmp_int32 num_threads) {
232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid, num_threads));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300234 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000235 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236}
237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jonathan Peyton30419822017-05-12 18:01:32 +0000240 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000241}
242
Jonathan Peyton30419822017-05-12 18:01:32 +0000243void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
244 kmp_int32 proc_bind) {
245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
246 proc_bind));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300247 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000249}
250
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251/*!
252@ingroup PARALLEL
253@param loc source location information
254@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000255@param microtask pointer to callback routine consisting of outlined parallel
256construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257@param ... pointers to shared variables that aren't global
258
259Do the actual fork and call the microtask in the relevant number of threads.
260*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000261void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000263
Jonathan Peyton61118492016-05-20 19:03:38 +0000264#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000265 // If we were in a serial region, then stop the serial timer, record
266 // the event, and start parallel region timer
267 stats_state_e previous_state = KMP_GET_THREAD_STATE();
268 if (previous_state == stats_state_e::SERIAL_REGION) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270 } else {
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000273 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 if (inParallel) {
275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276 } else {
277 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000278 }
279#endif
280
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 // maybe to save thr_state is enough here
282 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 va_list ap;
284 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000286#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000287 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000288 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000289 kmp_info_t *master_th = __kmp_threads[gtid];
290 kmp_team_t *parent_team = master_th->th.th_team;
291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292 if (lwt)
293 ompt_frame = &(lwt->ompt_task_info.frame);
294 else {
295 int tid = __kmp_tid_from_gtid(gtid);
296 ompt_frame = &(
297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000300 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000301 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000302#endif
303
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000304#if INCLUDE_SSC_MARKS
305 SSC_MARK_FORKING();
306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Dimitry Andriccde8f4c2020-08-22 15:25:18 +0200310 kmp_va_addr_of(ap));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311#if INCLUDE_SSC_MARKS
312 SSC_MARK_JOINING();
313#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000315#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 ,
317 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000318#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000319 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000320
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000323
324#if KMP_STATS_ENABLED
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
Peyton, Jonathan L04541542020-11-05 12:34:39 -0600327 KMP_SET_THREAD_STATE(previous_state);
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000328 } else {
329 KMP_POP_PARTITIONED_TIMER();
330 }
331#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332}
333
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334/*!
335@ingroup PARALLEL
336@param loc source location information
337@param global_tid global thread number
338@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000339@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000340
341Set the number of teams to be used by the teams construct.
342This call is only required if the teams construct has a `num_teams` clause
343or a `thread_limit` clause (or both).
344*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000345void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
346 kmp_int32 num_teams, kmp_int32 num_threads) {
347 KA_TRACE(20,
348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
349 global_tid, num_teams, num_threads));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300350 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352}
353
354/*!
355@ingroup PARALLEL
356@param loc source location information
357@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000358@param microtask pointer to callback routine consisting of outlined teams
359construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360@param ... pointers to shared variables that aren't global
361
362Do the actual fork and call the microtask in the relevant number of threads.
363*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000364void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
365 ...) {
366 int gtid = __kmp_entry_gtid();
367 kmp_info_t *this_thr = __kmp_threads[gtid];
368 va_list ap;
369 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000371#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000372 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000373 stats_state_e previous_state = KMP_GET_THREAD_STATE();
374 if (previous_state == stats_state_e::SERIAL_REGION) {
375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
376 } else {
377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
378 }
379#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000380
Jonathan Peyton30419822017-05-12 18:01:32 +0000381 // remember teams entry point and nesting level
382 this_thr->th.th_teams_microtask = microtask;
383 this_thr->th.th_teams_level =
384 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000385
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000386#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 kmp_team_t *parent_team = this_thr->th.th_team;
388 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000389 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000390 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000392 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000393 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000394#endif
395
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 // check if __kmpc_push_num_teams called, set default number of teams
397 // otherwise
398 if (this_thr->th.th_teams_size.nteams == 0) {
399 __kmp_push_num_teams(loc, gtid, 0, 0);
400 }
401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404
Dimitry Andriccde8f4c2020-08-22 15:25:18 +0200405 __kmp_fork_call(
406 loc, gtid, fork_context_intel, argc,
407 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
Jonathan Peyton30419822017-05-12 18:01:32 +0000409 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000410#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 ,
412 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000413#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000414 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000415
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000416 // Pop current CG root off list
417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
419 this_thr->th.th_cg_roots = tmp->up;
420 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
421 " to node %p. cg_nthreads was %d\n",
422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
Andrey Churbanovb7e6c372019-06-26 18:11:26 +0000423 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
424 int i = tmp->cg_nthreads--;
425 if (i == 1) { // check is we are the last thread in CG (not always the case)
426 __kmp_free(tmp);
427 }
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000428 // Restore current task's thread_limit from CG root
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 this_thr->th.th_current_task->td_icvs.thread_limit =
431 this_thr->th.th_cg_roots->cg_thread_limit;
432
Jonathan Peyton30419822017-05-12 18:01:32 +0000433 this_thr->th.th_teams_microtask = NULL;
434 this_thr->th.th_teams_level = 0;
435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
436 va_end(ap);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000437#if KMP_STATS_ENABLED
438 if (previous_state == stats_state_e::SERIAL_REGION) {
439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
Peyton, Jonathan L04541542020-11-05 12:34:39 -0600440 KMP_SET_THREAD_STATE(previous_state);
Jonathan Peytond2b53ca2019-04-03 18:53:26 +0000441 } else {
442 KMP_POP_PARTITIONED_TIMER();
443 }
444#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000446
Jim Cownie5e8470a2013-09-27 10:38:44 +0000447// I don't think this function should ever have been exported.
448// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
449// openmp code ever called it, but it's been exported from the RTL for so
450// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000451int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000452
453/*!
454@ingroup PARALLEL
455@param loc source location information
456@param global_tid global thread number
457
458Enter a serialized parallel construct. This interface is used to handle a
459conditional parallel region, like this,
460@code
461#pragma omp parallel if (condition)
462@endcode
463when the condition is false.
464*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000465void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300466 // The implementation is now in kmp_runtime.cpp so that it can share static
467 // functions with kmp_fork_call since the tasks to be done are similar in
468 // each case.
469 __kmp_assert_valid_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000470#if OMPT_SUPPORT
471 OMPT_STORE_RETURN_ADDRESS(global_tid);
472#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000473 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474}
475
476/*!
477@ingroup PARALLEL
478@param loc source location information
479@param global_tid global thread number
480
481Leave a serialized parallel construct.
482*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000483void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
484 kmp_internal_control_t *top;
485 kmp_info_t *this_thr;
486 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 KC_TRACE(10,
489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 /* skip all this code for autopar serialized loops since it results in
492 unacceptable overhead */
493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
494 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 // Not autopar code
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300497 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 if (!TCR_4(__kmp_init_parallel))
499 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000501 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 this_thr = __kmp_threads[global_tid];
504 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Jonathan Peyton30419822017-05-12 18:01:32 +0000507 // we need to wait for the proxy tasks before finishing the thread
508 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
509 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511 KMP_MB();
512 KMP_DEBUG_ASSERT(serial_team);
513 KMP_ASSERT(serial_team->t.t_serialized);
514 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
515 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
516 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
517 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000518
Joachim Protze82e94a52017-11-01 10:08:30 +0000519#if OMPT_SUPPORT
520 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000521 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
522 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000523 if (ompt_enabled.ompt_callback_implicit_task) {
524 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
525 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000526 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000527 }
528
529 // reset clear the task id only after unlinking the task
530 ompt_data_t *parent_task_data;
531 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
532
533 if (ompt_enabled.ompt_callback_parallel_end) {
534 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
535 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Hansang Bae67e93a12019-08-03 02:38:53 +0000536 ompt_parallel_invoker_program | ompt_parallel_team,
537 OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000538 }
539 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000540 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000541 }
542#endif
543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 /* If necessary, pop the internal control stack values and replace the team
545 * values */
546 top = serial_team->t.t_control_stack_top;
547 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
548 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
549 serial_team->t.t_control_stack_top = top->next;
550 __kmp_free(top);
551 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000552
Jonathan Peyton30419822017-05-12 18:01:32 +0000553 // if( serial_team -> t.t_serialized > 1 )
554 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555
Jonathan Peyton30419822017-05-12 18:01:32 +0000556 /* pop dispatch buffers stack */
557 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
558 {
559 dispatch_private_info_t *disp_buffer =
560 serial_team->t.t_dispatch->th_disp_buffer;
561 serial_team->t.t_dispatch->th_disp_buffer =
562 serial_team->t.t_dispatch->th_disp_buffer->next;
563 __kmp_free(disp_buffer);
564 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000565 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
Jim Cownie5e8470a2013-09-27 10:38:44 +0000566
Jonathan Peyton30419822017-05-12 18:01:32 +0000567 --serial_team->t.t_serialized;
568 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569
Jonathan Peyton30419822017-05-12 18:01:32 +0000570/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
572#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000573 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
574 __kmp_clear_x87_fpu_status_word();
575 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
576 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
577 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
579
Jonathan Peyton30419822017-05-12 18:01:32 +0000580 this_thr->th.th_team = serial_team->t.t_parent;
581 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582
Jonathan Peyton30419822017-05-12 18:01:32 +0000583 /* restore values cached in the thread */
584 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
585 this_thr->th.th_team_master =
586 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
587 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588
Jonathan Peyton30419822017-05-12 18:01:32 +0000589 /* TODO the below shouldn't need to be adjusted for serialized teams */
590 this_thr->th.th_dispatch =
591 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000592
Jonathan Peyton30419822017-05-12 18:01:32 +0000593 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594
Jonathan Peyton30419822017-05-12 18:01:32 +0000595 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
596 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598 if (__kmp_tasking_mode != tskm_immediate_exec) {
599 // Copy the task team from the new child / old parent team to the thread.
600 this_thr->th.th_task_team =
601 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
602 KA_TRACE(20,
603 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
604 "team %p\n",
605 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000607 } else {
608 if (__kmp_tasking_mode != tskm_immediate_exec) {
609 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
610 "depth of serial team %p to %d\n",
611 global_tid, serial_team, serial_team->t.t_serialized));
612 }
613 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000614
Jonathan Peyton30419822017-05-12 18:01:32 +0000615 if (__kmp_env_consistency_check)
616 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000617#if OMPT_SUPPORT
618 if (ompt_enabled.enabled)
619 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000620 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
621 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000622#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000623}
624
625/*!
626@ingroup SYNCHRONIZATION
627@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000628
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000629Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000630depending on the memory ordering convention obeyed by the compiler
631even that may not be necessary).
632*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000633void __kmpc_flush(ident_t *loc) {
634 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635
Jonathan Peyton30419822017-05-12 18:01:32 +0000636 /* need explicit __mf() here since use volatile instead in library */
637 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000638
Jonathan Peyton30419822017-05-12 18:01:32 +0000639#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
640#if KMP_MIC
641// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
642// We shouldn't need it, though, since the ABI rules require that
643// * If the compiler generates NGO stores it also generates the fence
644// * If users hand-code NGO stores they should insert the fence
645// therefore no incomplete unordered stores should be visible.
646#else
647 // C74404
648 // This is to address non-temporal store instructions (sfence needed).
649 // The clflush instruction is addressed either (mfence needed).
650 // Probably the non-temporal load monvtdqa instruction should also be
651 // addressed.
652 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
653 if (!__kmp_cpuinfo.initialized) {
654 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000655 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000656 if (!__kmp_cpuinfo.sse2) {
657 // CPU cannot execute SSE2 instructions.
658 } else {
659#if KMP_COMPILER_ICC
660 _mm_mfence();
661#elif KMP_COMPILER_MSVC
662 MemoryBarrier();
663#else
664 __sync_synchronize();
665#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000666 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000667#endif // KMP_MIC
Jonas Hahnfeld2488ae92019-07-25 14:36:20 +0000668#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
669 KMP_ARCH_RISCV64)
Jonathan Peyton30419822017-05-12 18:01:32 +0000670// Nothing to see here move along
671#elif KMP_ARCH_PPC64
672// Nothing needed here (we have a real MB above).
Jonathan Peyton30419822017-05-12 18:01:32 +0000673#else
674#error Unknown or unsupported architecture
675#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000676
677#if OMPT_SUPPORT && OMPT_OPTIONAL
678 if (ompt_enabled.ompt_callback_flush) {
679 ompt_callbacks.ompt_callback(ompt_callback_flush)(
680 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
681 }
682#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683}
684
685/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686/*!
687@ingroup SYNCHRONIZATION
688@param loc source location information
689@param global_tid thread id.
690
691Execute a barrier.
692*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000693void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
694 KMP_COUNT_BLOCK(OMP_BARRIER);
695 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300696 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 if (!TCR_4(__kmp_init_parallel))
699 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000701 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000702
Jonathan Peyton30419822017-05-12 18:01:32 +0000703 if (__kmp_env_consistency_check) {
704 if (loc == 0) {
705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000706 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000707 __kmp_check_barrier(global_tid, ct_barrier, loc);
708 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000709
Joachim Protze82e94a52017-11-01 10:08:30 +0000710#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000711 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000712 if (ompt_enabled.enabled) {
713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000714 if (ompt_frame->enter_frame.ptr == NULL)
715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000716 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000717 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000718#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000719 __kmp_threads[global_tid]->th.th_ident = loc;
720 // TODO: explicit barrier_wait_id:
721 // this function is called when 'barrier' directive is present or
722 // implicit barrier at the end of a worksharing construct.
723 // 1) better to add a per-thread barrier counter to a thread data structure
724 // 2) set to 0 when a new team is created
725 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000728#if OMPT_SUPPORT && OMPT_OPTIONAL
729 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000730 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733}
734
735/* The BARRIER for a MASTER section is always explicit */
736/*!
737@ingroup WORK_SHARING
738@param loc source location information.
739@param global_tid global thread number .
740@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
741*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000742kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
743 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744
Jonathan Peyton30419822017-05-12 18:01:32 +0000745 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300746 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000747
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 if (!TCR_4(__kmp_init_parallel))
749 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000751 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000752
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 if (KMP_MASTER_GTID(global_tid)) {
754 KMP_COUNT_BLOCK(OMP_MASTER);
755 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
756 status = 1;
757 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758
Joachim Protze82e94a52017-11-01 10:08:30 +0000759#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000760 if (status) {
Joachim Protze6213ed02020-11-04 12:16:25 +0100761 if (ompt_enabled.ompt_callback_masked) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 kmp_info_t *this_thr = __kmp_threads[global_tid];
763 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000764
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze6213ed02020-11-04 12:16:25 +0100766 ompt_callbacks.ompt_callback(ompt_callback_masked)(
Joachim Protze82e94a52017-11-01 10:08:30 +0000767 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
768 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
769 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000770 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000772#endif
773
Jonathan Peyton30419822017-05-12 18:01:32 +0000774 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000775#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 if (status)
777 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
778 else
779 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000780#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 if (status)
782 __kmp_push_sync(global_tid, ct_master, loc, NULL);
783 else
784 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000785#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000787
Jonathan Peyton30419822017-05-12 18:01:32 +0000788 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789}
790
791/*!
792@ingroup WORK_SHARING
793@param loc source location information.
794@param global_tid global thread number .
795
Jonathan Peyton30419822017-05-12 18:01:32 +0000796Mark the end of a <tt>master</tt> region. This should only be called by the
797thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000799void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
800 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300801 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
803 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804
Joachim Protze82e94a52017-11-01 10:08:30 +0000805#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000806 kmp_info_t *this_thr = __kmp_threads[global_tid];
807 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze6213ed02020-11-04 12:16:25 +0100808 if (ompt_enabled.ompt_callback_masked) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000809 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze6213ed02020-11-04 12:16:25 +0100810 ompt_callbacks.ompt_callback(ompt_callback_masked)(
Joachim Protze82e94a52017-11-01 10:08:30 +0000811 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
812 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
813 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000815#endif
816
Jonathan Peyton30419822017-05-12 18:01:32 +0000817 if (__kmp_env_consistency_check) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000818 if (KMP_MASTER_GTID(global_tid))
819 __kmp_pop_sync(global_tid, ct_master, loc);
820 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821}
822
823/*!
824@ingroup WORK_SHARING
825@param loc source location information.
826@param gtid global thread number.
827
828Start execution of an <tt>ordered</tt> construct.
829*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000830void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
831 int cid = 0;
832 kmp_info_t *th;
833 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000834
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300836 __kmp_assert_valid_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 if (!TCR_4(__kmp_init_parallel))
839 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000841 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000842
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 __kmp_itt_ordered_prep(gtid);
845// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846#endif /* USE_ITT_BUILD */
847
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849
Joachim Protze82e94a52017-11-01 10:08:30 +0000850#if OMPT_SUPPORT && OMPT_OPTIONAL
851 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000852 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000853 void *codeptr_ra;
854 if (ompt_enabled.enabled) {
855 OMPT_STORE_RETURN_ADDRESS(gtid);
856 team = __kmp_team_from_gtid(gtid);
Joachim Protze4109d562019-05-20 14:21:42 +0000857 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000859 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000860 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000861
Jonathan Peyton30419822017-05-12 18:01:32 +0000862 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000863 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
864 if (ompt_enabled.ompt_callback_mutex_acquire) {
865 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze4109d562019-05-20 14:21:42 +0000866 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
867 codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000868 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000869 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000870#endif
871
Jonathan Peyton30419822017-05-12 18:01:32 +0000872 if (th->th.th_dispatch->th_deo_fcn != 0)
873 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
874 else
875 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000876
Joachim Protze82e94a52017-11-01 10:08:30 +0000877#if OMPT_SUPPORT && OMPT_OPTIONAL
878 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000879 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000880 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000881 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000882
Jonathan Peyton30419822017-05-12 18:01:32 +0000883 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000884 if (ompt_enabled.ompt_callback_mutex_acquired) {
885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +0000886 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000887 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000889#endif
890
Jim Cownie5e8470a2013-09-27 10:38:44 +0000891#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000893#endif /* USE_ITT_BUILD */
894}
895
896/*!
897@ingroup WORK_SHARING
898@param loc source location information.
899@param gtid global thread number.
900
901End execution of an <tt>ordered</tt> construct.
902*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000903void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
904 int cid = 0;
905 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000906
Jonathan Peyton30419822017-05-12 18:01:32 +0000907 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +0300908 __kmp_assert_valid_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000909
910#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000911 __kmp_itt_ordered_end(gtid);
912// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000913#endif /* USE_ITT_BUILD */
914
Jonathan Peyton30419822017-05-12 18:01:32 +0000915 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000916
Jonathan Peyton30419822017-05-12 18:01:32 +0000917 if (th->th.th_dispatch->th_dxo_fcn != 0)
918 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
919 else
920 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000921
Joachim Protze82e94a52017-11-01 10:08:30 +0000922#if OMPT_SUPPORT && OMPT_OPTIONAL
923 OMPT_STORE_RETURN_ADDRESS(gtid);
924 if (ompt_enabled.ompt_callback_mutex_released) {
925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
926 ompt_mutex_ordered,
Joachim Protze4109d562019-05-20 14:21:42 +0000927 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
928 ->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000929 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000930 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000931#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932}
933
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000934#if KMP_USE_DYNAMIC_LOCK
935
Jonathan Peytondae13d82015-12-11 21:57:06 +0000936static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000937__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
938 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
939 // Pointer to the allocated indirect lock is written to crit, while indexing
940 // is ignored.
941 void *idx;
942 kmp_indirect_lock_t **lck;
943 lck = (kmp_indirect_lock_t **)crit;
944 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
945 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
946 KMP_SET_I_LOCK_LOCATION(ilk, loc);
947 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
948 KA_TRACE(20,
949 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000950#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000952#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000953 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000954 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000955#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000956 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000957#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000958 // We don't really need to destroy the unclaimed lock here since it will be
959 // cleaned up at program exit.
960 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
961 }
962 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000963}
964
965// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000966#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
967 { \
968 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000969 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
970 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
971 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
972 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000973 kmp_uint32 spins; \
974 KMP_FSYNC_PREPARE(l); \
975 KMP_INIT_YIELD(spins); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000976 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000977 do { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000978 if (TCR_4(__kmp_nth) > \
979 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
980 KMP_YIELD(TRUE); \
981 } else { \
982 KMP_YIELD_SPIN(spins); \
983 } \
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000984 __kmp_spin_backoff(&backoff); \
985 } while ( \
986 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
987 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000988 } \
989 KMP_FSYNC_ACQUIRED(l); \
990 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000991
992// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000993#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
994 { \
995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
998 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
999 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001000 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001001
1002// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001003#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001004 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001005
Jonathan Peytondae13d82015-12-11 21:57:06 +00001006#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001007
Jonathan Peyton30419822017-05-12 18:01:32 +00001008#include <sys/syscall.h>
1009#include <unistd.h>
1010#ifndef FUTEX_WAIT
1011#define FUTEX_WAIT 0
1012#endif
1013#ifndef FUTEX_WAKE
1014#define FUTEX_WAKE 1
1015#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001016
1017// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001018#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1019 { \
1020 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1021 kmp_int32 gtid_code = (gtid + 1) << 1; \
1022 KMP_MB(); \
1023 KMP_FSYNC_PREPARE(ftx); \
1024 kmp_int32 poll_val; \
1025 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1026 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1027 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1028 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1029 if (!cond) { \
1030 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1031 poll_val | \
1032 KMP_LOCK_BUSY(1, futex))) { \
1033 continue; \
1034 } \
1035 poll_val |= KMP_LOCK_BUSY(1, futex); \
1036 } \
1037 kmp_int32 rc; \
1038 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1039 NULL, NULL, 0)) != 0) { \
1040 continue; \
1041 } \
1042 gtid_code |= 1; \
1043 } \
1044 KMP_FSYNC_ACQUIRED(ftx); \
1045 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001046
1047// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001048#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1049 { \
1050 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1051 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1052 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1053 KMP_FSYNC_ACQUIRED(ftx); \
1054 rc = TRUE; \
1055 } else { \
1056 rc = FALSE; \
1057 } \
1058 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001059
1060// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001061#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1062 { \
1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1064 KMP_MB(); \
1065 KMP_FSYNC_RELEASING(ftx); \
1066 kmp_int32 poll_val = \
1067 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1068 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1069 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1070 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1071 } \
1072 KMP_MB(); \
Jonathan Peytone47d32f2019-02-28 19:11:29 +00001073 KMP_YIELD_OVERSUB(); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001074 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001075
Jonathan Peytondae13d82015-12-11 21:57:06 +00001076#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001077
1078#else // KMP_USE_DYNAMIC_LOCK
1079
Jonathan Peyton30419822017-05-12 18:01:32 +00001080static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1081 ident_t const *loc,
1082 kmp_int32 gtid) {
1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 // Because of the double-check, the following load doesn't need to be volatile
1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087
Jonathan Peyton30419822017-05-12 18:01:32 +00001088 if (lck == NULL) {
1089 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090
Jonathan Peyton30419822017-05-12 18:01:32 +00001091 // Allocate & initialize the lock.
1092 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094 __kmp_init_user_lock_with_checks(lck);
1095 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001097 __kmp_itt_critical_creating(lck);
1098// __kmp_itt_critical_creating() should be called *before* the first usage
1099// of underlying lock. It is the only place where we can guarantee it. There
1100// are chances the lock will destroyed with no usage, but it is not a
1101// problem, because this is not real event seen by user but rather setting
1102// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103#endif /* USE_ITT_BUILD */
1104
Jonathan Peyton30419822017-05-12 18:01:32 +00001105 // Use a cmpxchg instruction to slam the start of the critical section with
1106 // the lock pointer. If another thread beat us to it, deallocate the lock,
1107 // and use the lock that the other thread allocated.
1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109
Jonathan Peyton30419822017-05-12 18:01:32 +00001110 if (status == 0) {
1111// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001113 __kmp_itt_critical_destroyed(lck);
1114// Let ITT know the lock is destroyed and the same memory location may be reused
1115// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001116#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001117 __kmp_destroy_user_lock_with_checks(lck);
1118 __kmp_user_lock_free(&idx, gtid, lck);
1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001122 }
1123 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124}
1125
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001126#endif // KMP_USE_DYNAMIC_LOCK
1127
Jim Cownie5e8470a2013-09-27 10:38:44 +00001128/*!
1129@ingroup WORK_SHARING
1130@param loc source location information.
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001131@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001132@param crit identity of the critical section. This could be a pointer to a lock
1133associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001134
1135Enter code protected by a `critical` construct.
1136This function blocks until the executing thread can enter the critical section.
1137*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001138void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1139 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001140#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001141#if OMPT_SUPPORT && OMPT_OPTIONAL
1142 OMPT_STORE_RETURN_ADDRESS(global_tid);
1143#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001144 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001145#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001147#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001148 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001149 ompt_thread_info_t ti;
1150#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001151 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152
Jonathan Peyton30419822017-05-12 18:01:32 +00001153 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001154 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155
Jonathan Peyton30419822017-05-12 18:01:32 +00001156 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001158 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001159 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 if ((__kmp_user_lock_kind == lk_tas) &&
1162 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1163 lck = (kmp_user_lock_p)crit;
1164 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001165#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001166 else if ((__kmp_user_lock_kind == lk_futex) &&
1167 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1168 lck = (kmp_user_lock_p)crit;
1169 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001170#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001171 else { // ticket, queuing or drdpa
1172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1173 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001174
Jonathan Peyton30419822017-05-12 18:01:32 +00001175 if (__kmp_env_consistency_check)
1176 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177
Jonathan Peyton30419822017-05-12 18:01:32 +00001178// since the critical directive binds to all threads, not just the current
1179// team we have to check this even if we are in a serialized team.
1180// also, even if we are the uber thread, we still have to conduct the lock,
1181// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001182
1183#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001185#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001186#if OMPT_SUPPORT && OMPT_OPTIONAL
1187 OMPT_STORE_RETURN_ADDRESS(gtid);
1188 void *codeptr_ra = NULL;
1189 if (ompt_enabled.enabled) {
1190 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1191 /* OMPT state update */
1192 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001193 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001194 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001195
1196 /* OMPT event callback */
1197 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1198 if (ompt_enabled.ompt_callback_mutex_acquire) {
1199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1200 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00001201 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001202 }
1203 }
1204#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001205 // Value of 'crit' should be good for using as a critical_id of the critical
1206 // section directive.
1207 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001208
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001209#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001210 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001211#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001212#if OMPT_SUPPORT && OMPT_OPTIONAL
1213 if (ompt_enabled.enabled) {
1214 /* OMPT state update */
1215 ti.state = prev_state;
1216 ti.wait_id = 0;
1217
1218 /* OMPT event callback */
1219 if (ompt_enabled.ompt_callback_mutex_acquired) {
1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001221 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001222 }
1223 }
1224#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001225 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001226
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001227 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001229#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001230}
1231
1232#if KMP_USE_DYNAMIC_LOCK
1233
1234// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001235static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001236#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001237#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001238#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001239#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001240#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001241
1242#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001243#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001244#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001245#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001246#endif
1247
Jonathan Peyton30419822017-05-12 18:01:32 +00001248 // Hints that do not require further logic
1249 if (hint & kmp_lock_hint_hle)
1250 return KMP_TSX_LOCK(hle);
1251 if (hint & kmp_lock_hint_rtm)
1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1253 if (hint & kmp_lock_hint_adaptive)
1254 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001255
Jonathan Peyton30419822017-05-12 18:01:32 +00001256 // Rule out conflicting hints first by returning the default lock
1257 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001258 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001259 if ((hint & omp_lock_hint_speculative) &&
1260 (hint & omp_lock_hint_nonspeculative))
1261 return __kmp_user_lock_seq;
1262
1263 // Do not even consider speculation when it appears to be contended
1264 if (hint & omp_lock_hint_contended)
1265 return lockseq_queuing;
1266
1267 // Uncontended lock without speculation
1268 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1269 return lockseq_tas;
1270
1271 // HLE lock for speculation
1272 if (hint & omp_lock_hint_speculative)
1273 return KMP_TSX_LOCK(hle);
1274
1275 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001276}
1277
Joachim Protze82e94a52017-11-01 10:08:30 +00001278#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001279#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001280static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001281__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1282 if (user_lock) {
1283 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1284 case 0:
1285 break;
1286#if KMP_USE_FUTEX
1287 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001288 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001289#endif
1290 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001291 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001292#if KMP_USE_TSX
1293 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001294 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001295#endif
1296 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001297 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001298 }
1299 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1300 }
1301 KMP_ASSERT(ilock);
1302 switch (ilock->type) {
1303#if KMP_USE_TSX
1304 case locktag_adaptive:
1305 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001306 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001307#endif
1308 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001309 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001310#if KMP_USE_FUTEX
1311 case locktag_nested_futex:
1312#endif
1313 case locktag_ticket:
1314 case locktag_queuing:
1315 case locktag_drdpa:
1316 case locktag_nested_ticket:
1317 case locktag_nested_queuing:
1318 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001319 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001320 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001321 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001322 }
1323}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001324#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001325// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001326static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001327 switch (__kmp_user_lock_kind) {
1328 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001329 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001330#if KMP_USE_FUTEX
1331 case lk_futex:
1332#endif
1333 case lk_ticket:
1334 case lk_queuing:
1335 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001336 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001337#if KMP_USE_TSX
1338 case lk_hle:
1339 case lk_rtm:
1340 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001341 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001342#endif
1343 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001344 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001345 }
1346}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001347#endif // KMP_USE_DYNAMIC_LOCK
1348#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001349
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001350/*!
1351@ingroup WORK_SHARING
1352@param loc source location information.
1353@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001354@param crit identity of the critical section. This could be a pointer to a lock
1355associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001356@param hint the lock hint.
1357
Jonathan Peyton30419822017-05-12 18:01:32 +00001358Enter code protected by a `critical` construct with a hint. The hint value is
1359used to suggest a lock implementation. This function blocks until the executing
1360thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001361speculative execution and the hardware supports it.
1362*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001363void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001364 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 KMP_COUNT_BLOCK(OMP_CRITICAL);
1366 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001367#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001368 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001369 ompt_thread_info_t ti;
1370 // This is the case, if called from __kmpc_critical:
1371 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1372 if (!codeptr)
1373 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1374#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001375
Jonathan Peyton30419822017-05-12 18:01:32 +00001376 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001377 __kmp_assert_valid_gtid(global_tid);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001378
Jonathan Peyton30419822017-05-12 18:01:32 +00001379 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1380 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001381 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001382 if (*lk == 0) {
1383 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1384 if (KMP_IS_D_LOCK(lckseq)) {
1385 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1386 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001387 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001388 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001389 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001390 }
1391 // Branch for accessing the actual lock object and set operation. This
1392 // branching is inevitable since this lock initialization does not follow the
1393 // normal dispatch path (lock table is not used).
1394 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1395 lck = (kmp_user_lock_p)lk;
1396 if (__kmp_env_consistency_check) {
1397 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1398 __kmp_map_hint_to_lock(hint));
1399 }
1400#if USE_ITT_BUILD
1401 __kmp_itt_critical_acquiring(lck);
1402#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001403#if OMPT_SUPPORT && OMPT_OPTIONAL
1404 if (ompt_enabled.enabled) {
1405 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1406 /* OMPT state update */
1407 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001408 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001409 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001410
1411 /* OMPT event callback */
1412 if (ompt_enabled.ompt_callback_mutex_acquire) {
1413 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1414 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001415 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1416 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001417 }
1418 }
1419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001420#if KMP_USE_INLINED_TAS
1421 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1422 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1423 } else
1424#elif KMP_USE_INLINED_FUTEX
1425 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1426 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1427 } else
1428#endif
1429 {
1430 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1431 }
1432 } else {
1433 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1434 lck = ilk->lock;
1435 if (__kmp_env_consistency_check) {
1436 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1437 __kmp_map_hint_to_lock(hint));
1438 }
1439#if USE_ITT_BUILD
1440 __kmp_itt_critical_acquiring(lck);
1441#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001442#if OMPT_SUPPORT && OMPT_OPTIONAL
1443 if (ompt_enabled.enabled) {
1444 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1445 /* OMPT state update */
1446 prev_state = ti.state;
Joachim Protze4109d562019-05-20 14:21:42 +00001447 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001448 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001449
1450 /* OMPT event callback */
1451 if (ompt_enabled.ompt_callback_mutex_acquire) {
1452 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1453 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00001454 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1455 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001456 }
1457 }
1458#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001459 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1460 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001461 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001462
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001464 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001465#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001466#if OMPT_SUPPORT && OMPT_OPTIONAL
1467 if (ompt_enabled.enabled) {
1468 /* OMPT state update */
1469 ti.state = prev_state;
1470 ti.wait_id = 0;
1471
1472 /* OMPT event callback */
1473 if (ompt_enabled.ompt_callback_mutex_acquired) {
1474 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00001475 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001476 }
1477 }
1478#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001479
Jonathan Peyton30419822017-05-12 18:01:32 +00001480 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1481 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001482} // __kmpc_critical_with_hint
1483
1484#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485
1486/*!
1487@ingroup WORK_SHARING
1488@param loc source location information.
1489@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001490@param crit identity of the critical section. This could be a pointer to a lock
1491associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492
1493Leave a critical section, releasing any lock that was held during its execution.
1494*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001495void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1496 kmp_critical_name *crit) {
1497 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001498
Jonathan Peyton30419822017-05-12 18:01:32 +00001499 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001501#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001502 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1503 lck = (kmp_user_lock_p)crit;
1504 KMP_ASSERT(lck != NULL);
1505 if (__kmp_env_consistency_check) {
1506 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001507 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001508#if USE_ITT_BUILD
1509 __kmp_itt_critical_releasing(lck);
1510#endif
1511#if KMP_USE_INLINED_TAS
1512 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1513 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1514 } else
1515#elif KMP_USE_INLINED_FUTEX
1516 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1517 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1518 } else
1519#endif
1520 {
1521 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1522 }
1523 } else {
1524 kmp_indirect_lock_t *ilk =
1525 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1526 KMP_ASSERT(ilk != NULL);
1527 lck = ilk->lock;
1528 if (__kmp_env_consistency_check) {
1529 __kmp_pop_sync(global_tid, ct_critical, loc);
1530 }
1531#if USE_ITT_BUILD
1532 __kmp_itt_critical_releasing(lck);
1533#endif
1534 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1535 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001536
1537#else // KMP_USE_DYNAMIC_LOCK
1538
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 if ((__kmp_user_lock_kind == lk_tas) &&
1540 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1541 lck = (kmp_user_lock_p)crit;
1542 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001543#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001544 else if ((__kmp_user_lock_kind == lk_futex) &&
1545 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1546 lck = (kmp_user_lock_p)crit;
1547 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001549 else { // ticket, queuing or drdpa
1550 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1551 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552
Jonathan Peyton30419822017-05-12 18:01:32 +00001553 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001554
Jonathan Peyton30419822017-05-12 18:01:32 +00001555 if (__kmp_env_consistency_check)
1556 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001557
1558#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001560#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001561 // Value of 'crit' should be good for using as a critical_id of the critical
1562 // section directive.
1563 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564
Joachim Protze82e94a52017-11-01 10:08:30 +00001565#endif // KMP_USE_DYNAMIC_LOCK
1566
1567#if OMPT_SUPPORT && OMPT_OPTIONAL
1568 /* OMPT release event triggers after lock is released; place here to trigger
1569 * for all #if branches */
1570 OMPT_STORE_RETURN_ADDRESS(global_tid);
1571 if (ompt_enabled.ompt_callback_mutex_released) {
1572 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00001573 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1574 OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001575 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001576#endif
1577
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 KMP_POP_PARTITIONED_TIMER();
1579 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580}
1581
1582/*!
1583@ingroup SYNCHRONIZATION
1584@param loc source location information
1585@param global_tid thread id.
1586@return one if the thread should execute the master block, zero otherwise
1587
Jonathan Peyton30419822017-05-12 18:01:32 +00001588Start execution of a combined barrier and master. The barrier is executed inside
1589this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001591kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1592 int status;
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001594 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595
Jonathan Peyton30419822017-05-12 18:01:32 +00001596 if (!TCR_4(__kmp_init_parallel))
1597 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001599 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001600
Jonathan Peyton30419822017-05-12 18:01:32 +00001601 if (__kmp_env_consistency_check)
1602 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603
Joachim Protze82e94a52017-11-01 10:08:30 +00001604#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001605 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001606 if (ompt_enabled.enabled) {
1607 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001608 if (ompt_frame->enter_frame.ptr == NULL)
1609 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001610 OMPT_STORE_RETURN_ADDRESS(global_tid);
1611 }
1612#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001613#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001614 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001615#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001616 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001617#if OMPT_SUPPORT && OMPT_OPTIONAL
1618 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001619 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001620 }
1621#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001622
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001624}
1625
1626/*!
1627@ingroup SYNCHRONIZATION
1628@param loc source location information
1629@param global_tid thread id.
1630
1631Complete the execution of a combined barrier and master. This function should
1632only be called at the completion of the <tt>master</tt> code. Other threads will
1633still be waiting at the barrier and this call releases them.
1634*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001635void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1636 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001637 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639}
1640
1641/*!
1642@ingroup SYNCHRONIZATION
1643@param loc source location information
1644@param global_tid thread id.
1645@return one if the thread should execute the master block, zero otherwise
1646
1647Start execution of a combined barrier and master(nowait) construct.
1648The barrier is executed inside this function.
1649There is no equivalent "end" function, since the
1650*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001651kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1652 kmp_int32 ret;
Jonathan Peyton30419822017-05-12 18:01:32 +00001653 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001654 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001655
Jonathan Peyton30419822017-05-12 18:01:32 +00001656 if (!TCR_4(__kmp_init_parallel))
1657 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001658
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001659 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001660
Jonathan Peyton30419822017-05-12 18:01:32 +00001661 if (__kmp_env_consistency_check) {
1662 if (loc == 0) {
1663 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 __kmp_check_barrier(global_tid, ct_barrier, loc);
1666 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001667
Joachim Protze82e94a52017-11-01 10:08:30 +00001668#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001669 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001670 if (ompt_enabled.enabled) {
1671 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001672 if (ompt_frame->enter_frame.ptr == NULL)
1673 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001674 OMPT_STORE_RETURN_ADDRESS(global_tid);
1675 }
1676#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001677#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001679#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001680 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001681#if OMPT_SUPPORT && OMPT_OPTIONAL
1682 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001683 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001684 }
1685#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001686
Jonathan Peyton30419822017-05-12 18:01:32 +00001687 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001688
Jonathan Peyton30419822017-05-12 18:01:32 +00001689 if (__kmp_env_consistency_check) {
1690 /* there's no __kmpc_end_master called; so the (stats) */
1691 /* actions of __kmpc_end_master are done here */
Jonathan Peyton30419822017-05-12 18:01:32 +00001692 if (ret) {
1693 /* only one thread should do the pop since only */
1694 /* one did the push (see __kmpc_master()) */
Jonathan Peyton30419822017-05-12 18:01:32 +00001695 __kmp_pop_sync(global_tid, ct_master, loc);
1696 }
1697 }
1698
1699 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700}
1701
1702/* The BARRIER for a SINGLE process section is always explicit */
1703/*!
1704@ingroup WORK_SHARING
1705@param loc source location information
1706@param global_tid global thread number
1707@return One if this thread should execute the single construct, zero otherwise.
1708
1709Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001710There are no implicit barriers in the two "single" calls, rather the compiler
1711should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712*/
1713
Jonathan Peyton30419822017-05-12 18:01:32 +00001714kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001715 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001717
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 if (rc) {
1719 // We are going to execute the single statement, so we should count it.
1720 KMP_COUNT_BLOCK(OMP_SINGLE);
1721 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1722 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001723
Joachim Protze82e94a52017-11-01 10:08:30 +00001724#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001725 kmp_info_t *this_thr = __kmp_threads[global_tid];
1726 kmp_team_t *team = this_thr->th.th_team;
1727 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001728
Joachim Protze82e94a52017-11-01 10:08:30 +00001729 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001730 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001731 if (ompt_enabled.ompt_callback_work) {
1732 ompt_callbacks.ompt_callback(ompt_callback_work)(
1733 ompt_work_single_executor, ompt_scope_begin,
1734 &(team->t.ompt_team_info.parallel_data),
1735 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1736 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 }
1738 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001739 if (ompt_enabled.ompt_callback_work) {
1740 ompt_callbacks.ompt_callback(ompt_callback_work)(
1741 ompt_work_single_other, ompt_scope_begin,
1742 &(team->t.ompt_team_info.parallel_data),
1743 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1744 1, OMPT_GET_RETURN_ADDRESS(0));
1745 ompt_callbacks.ompt_callback(ompt_callback_work)(
1746 ompt_work_single_other, ompt_scope_end,
1747 &(team->t.ompt_team_info.parallel_data),
1748 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1749 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753#endif
1754
Jonathan Peyton30419822017-05-12 18:01:32 +00001755 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001756}
1757
1758/*!
1759@ingroup WORK_SHARING
1760@param loc source location information
1761@param global_tid global thread number
1762
1763Mark the end of a <tt>single</tt> construct. This function should
1764only be called by the thread that executed the block of code protected
1765by the `single` construct.
1766*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001767void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03001768 __kmp_assert_valid_gtid(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001769 __kmp_exit_single(global_tid);
1770 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001771
Joachim Protze82e94a52017-11-01 10:08:30 +00001772#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 kmp_info_t *this_thr = __kmp_threads[global_tid];
1774 kmp_team_t *team = this_thr->th.th_team;
1775 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001776
Joachim Protze82e94a52017-11-01 10:08:30 +00001777 if (ompt_enabled.ompt_callback_work) {
1778 ompt_callbacks.ompt_callback(ompt_callback_work)(
1779 ompt_work_single_executor, ompt_scope_end,
1780 &(team->t.ompt_team_info.parallel_data),
1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1782 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001783 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001784#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785}
1786
1787/*!
1788@ingroup WORK_SHARING
1789@param loc Source location
1790@param global_tid Global thread id
1791
1792Mark the end of a statically scheduled loop.
1793*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001794void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001795 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001796 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797
Joachim Protze82e94a52017-11-01 10:08:30 +00001798#if OMPT_SUPPORT && OMPT_OPTIONAL
1799 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001800 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001802 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1803 // Determine workshare type
1804 if (loc != NULL) {
1805 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1806 ompt_work_type = ompt_work_loop;
1807 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1808 ompt_work_type = ompt_work_sections;
1809 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1810 ompt_work_type = ompt_work_distribute;
1811 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001812 // use default set above.
1813 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001814 }
1815 KMP_DEBUG_ASSERT(ompt_work_type);
1816 }
1817 ompt_callbacks.ompt_callback(ompt_callback_work)(
1818 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1819 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001820 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001822 if (__kmp_env_consistency_check)
1823 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824}
1825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826// User routines which take C-style arguments (call by value)
1827// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828
Jonathan Peyton30419822017-05-12 18:01:32 +00001829void ompc_set_num_threads(int arg) {
1830 // !!!!! TODO: check the per-task binding
1831 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832}
1833
Jonathan Peyton30419822017-05-12 18:01:32 +00001834void ompc_set_dynamic(int flag) {
1835 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836
Jonathan Peyton30419822017-05-12 18:01:32 +00001837 /* For the thread-private implementation of the internal controls */
1838 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001839
Jonathan Peyton30419822017-05-12 18:01:32 +00001840 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001841
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843}
1844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845void ompc_set_nested(int flag) {
1846 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847
Jonathan Peyton30419822017-05-12 18:01:32 +00001848 /* For the thread-private internal controls implementation */
1849 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001853 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001854}
1855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856void ompc_set_max_active_levels(int max_active_levels) {
1857 /* TO DO */
1858 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001859
Jonathan Peyton30419822017-05-12 18:01:32 +00001860 /* For the per-thread internal controls implementation */
1861 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001862}
1863
Jonathan Peyton30419822017-05-12 18:01:32 +00001864void ompc_set_schedule(omp_sched_t kind, int modifier) {
1865 // !!!!! TODO: check the per-task binding
1866 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867}
1868
Jonathan Peyton30419822017-05-12 18:01:32 +00001869int ompc_get_ancestor_thread_num(int level) {
1870 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871}
1872
Jonathan Peyton30419822017-05-12 18:01:32 +00001873int ompc_get_team_size(int level) {
1874 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875}
1876
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001877/* OpenMP 5.0 Affinity Format API */
1878
1879void ompc_set_affinity_format(char const *format) {
1880 if (!__kmp_init_serial) {
1881 __kmp_serial_initialize();
1882 }
1883 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1884 format, KMP_STRLEN(format) + 1);
1885}
1886
1887size_t ompc_get_affinity_format(char *buffer, size_t size) {
1888 size_t format_size;
1889 if (!__kmp_init_serial) {
1890 __kmp_serial_initialize();
1891 }
1892 format_size = KMP_STRLEN(__kmp_affinity_format);
1893 if (buffer && size) {
1894 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1895 format_size + 1);
1896 }
1897 return format_size;
1898}
1899
1900void ompc_display_affinity(char const *format) {
1901 int gtid;
1902 if (!TCR_4(__kmp_init_middle)) {
1903 __kmp_middle_initialize();
1904 }
1905 gtid = __kmp_get_gtid();
1906 __kmp_aux_display_affinity(gtid, format);
1907}
1908
1909size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1910 char const *format) {
1911 int gtid;
1912 size_t num_required;
1913 kmp_str_buf_t capture_buf;
1914 if (!TCR_4(__kmp_init_middle)) {
1915 __kmp_middle_initialize();
1916 }
1917 gtid = __kmp_get_gtid();
1918 __kmp_str_buf_init(&capture_buf);
1919 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1920 if (buffer && buf_size) {
1921 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1922 capture_buf.used + 1);
1923 }
1924 __kmp_str_buf_free(&capture_buf);
1925 return num_required;
1926}
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001927
Jonathan Peyton30419822017-05-12 18:01:32 +00001928void kmpc_set_stacksize(int arg) {
1929 // __kmp_aux_set_stacksize initializes the library if needed
1930 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931}
1932
Jonathan Peyton30419822017-05-12 18:01:32 +00001933void kmpc_set_stacksize_s(size_t arg) {
1934 // __kmp_aux_set_stacksize initializes the library if needed
1935 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936}
1937
Jonathan Peyton30419822017-05-12 18:01:32 +00001938void kmpc_set_blocktime(int arg) {
1939 int gtid, tid;
1940 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941
Jonathan Peyton30419822017-05-12 18:01:32 +00001942 gtid = __kmp_entry_gtid();
1943 tid = __kmp_tid_from_gtid(gtid);
1944 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945
Jonathan Peyton30419822017-05-12 18:01:32 +00001946 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947}
1948
Jonathan Peyton30419822017-05-12 18:01:32 +00001949void kmpc_set_library(int arg) {
1950 // __kmp_user_set_library initializes the library if needed
1951 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952}
1953
Jonathan Peyton30419822017-05-12 18:01:32 +00001954void kmpc_set_defaults(char const *str) {
1955 // __kmp_aux_set_defaults initializes the library if needed
1956 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957}
1958
Jonathan Peyton30419822017-05-12 18:01:32 +00001959void kmpc_set_disp_num_buffers(int arg) {
1960 // ignore after initialization because some teams have already
1961 // allocated dispatch buffers
1962 if (__kmp_init_serial == 0 && arg > 0)
1963 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001964}
1965
Jonathan Peyton30419822017-05-12 18:01:32 +00001966int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001967#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001968 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001970 if (!TCR_4(__kmp_init_middle)) {
1971 __kmp_middle_initialize();
1972 }
1973 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974#endif
1975}
1976
Jonathan Peyton30419822017-05-12 18:01:32 +00001977int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001978#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001979 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001981 if (!TCR_4(__kmp_init_middle)) {
1982 __kmp_middle_initialize();
1983 }
1984 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985#endif
1986}
1987
Jonathan Peyton30419822017-05-12 18:01:32 +00001988int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001989#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001990 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 if (!TCR_4(__kmp_init_middle)) {
1993 __kmp_middle_initialize();
1994 }
1995 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996#endif
1997}
1998
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999/* -------------------------------------------------------------------------- */
2000/*!
2001@ingroup THREADPRIVATE
2002@param loc source location information
2003@param gtid global thread number
2004@param cpy_size size of the cpy_data buffer
2005@param cpy_data pointer to data to be copied
2006@param cpy_func helper function to call for copying data
2007@param didit flag variable: 1=single thread; 0=not single thread
2008
Jonathan Peyton30419822017-05-12 18:01:32 +00002009__kmpc_copyprivate implements the interface for the private data broadcast
2010needed for the copyprivate clause associated with a single region in an
2011OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002013One of the threads (called the single thread) should have the <tt>didit</tt>
2014variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015All threads pass a pointer to a data buffer (cpy_data) that they have built.
2016
Jonathan Peyton30419822017-05-12 18:01:32 +00002017The OpenMP specification forbids the use of nowait on the single region when a
2018copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2019barrier internally to avoid race conditions, so the code generation for the
2020single region should avoid generating a barrier after the call to @ref
2021__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022
2023The <tt>gtid</tt> parameter is the global thread id for the current thread.
2024The <tt>loc</tt> parameter is a pointer to source location information.
2025
Jonathan Peyton30419822017-05-12 18:01:32 +00002026Internal implementation: The single thread will first copy its descriptor
2027address (cpy_data) to a team-private location, then the other threads will each
2028call the function pointed to by the parameter cpy_func, which carries out the
2029copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030
Jonathan Peyton30419822017-05-12 18:01:32 +00002031The cpy_func routine used for the copy and the contents of the data area defined
2032by cpy_data and cpy_size may be built in any fashion that will allow the copy
2033to be done. For instance, the cpy_data buffer can hold the actual data to be
2034copied or it may hold a list of pointers to the data. The cpy_func routine must
2035interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036
2037The interface to cpy_func is as follows:
2038@code
2039void cpy_func( void *destination, void *source )
2040@endcode
2041where void *destination is the cpy_data pointer for the thread being copied to
2042and void *source is the cpy_data pointer for the thread being copied from.
2043*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002044void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2045 void *cpy_data, void (*cpy_func)(void *, void *),
2046 kmp_int32 didit) {
2047 void **data_ptr;
Jonathan Peyton30419822017-05-12 18:01:32 +00002048 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03002049 __kmp_assert_valid_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050
Jonathan Peyton30419822017-05-12 18:01:32 +00002051 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052
Jonathan Peyton30419822017-05-12 18:01:32 +00002053 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054
Jonathan Peyton30419822017-05-12 18:01:32 +00002055 if (__kmp_env_consistency_check) {
2056 if (loc == 0) {
2057 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002059 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060
Jonathan Peyton30419822017-05-12 18:01:32 +00002061 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062
Jonathan Peyton30419822017-05-12 18:01:32 +00002063 if (didit)
2064 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065
Joachim Protze82e94a52017-11-01 10:08:30 +00002066#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002067 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002068 if (ompt_enabled.enabled) {
2069 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002070 if (ompt_frame->enter_frame.ptr == NULL)
2071 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002072 OMPT_STORE_RETURN_ADDRESS(gtid);
2073 }
2074#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002075/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002076#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002077 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002078#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002079 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002080
Jonathan Peyton30419822017-05-12 18:01:32 +00002081 if (!didit)
2082 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
Jonathan Peyton30419822017-05-12 18:01:32 +00002084// Consider next barrier a user-visible barrier for barrier region boundaries
2085// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086
Joachim Protze82e94a52017-11-01 10:08:30 +00002087#if OMPT_SUPPORT
2088 if (ompt_enabled.enabled) {
2089 OMPT_STORE_RETURN_ADDRESS(gtid);
2090 }
2091#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002092#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002093 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2094// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002095#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002096 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002097#if OMPT_SUPPORT && OMPT_OPTIONAL
2098 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002099 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002100 }
2101#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002102}
2103
2104/* -------------------------------------------------------------------------- */
2105
Jonathan Peyton30419822017-05-12 18:01:32 +00002106#define INIT_LOCK __kmp_init_user_lock_with_checks
2107#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2108#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2109#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2110#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2111#define ACQUIRE_NESTED_LOCK_TIMED \
2112 __kmp_acquire_nested_user_lock_with_checks_timed
2113#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2114#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2115#define TEST_LOCK __kmp_test_user_lock_with_checks
2116#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2117#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2118#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002119
Jonathan Peyton30419822017-05-12 18:01:32 +00002120// TODO: Make check abort messages use location info & pass it into
2121// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002123#if KMP_USE_DYNAMIC_LOCK
2124
2125// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002126static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2127 kmp_dyna_lockseq_t seq) {
2128 if (KMP_IS_D_LOCK(seq)) {
2129 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002130#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002131 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002132#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002133 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002134 KMP_INIT_I_LOCK(lock, seq);
2135#if USE_ITT_BUILD
2136 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2137 __kmp_itt_lock_creating(ilk->lock, loc);
2138#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 }
2140}
2141
2142// internal nest lock initializer
2143static __forceinline void
2144__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2145 kmp_dyna_lockseq_t seq) {
2146#if KMP_USE_TSX
2147 // Don't have nested lock implementation for speculative locks
2148 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2149 seq = __kmp_user_lock_seq;
2150#endif
2151 switch (seq) {
2152 case lockseq_tas:
2153 seq = lockseq_nested_tas;
2154 break;
2155#if KMP_USE_FUTEX
2156 case lockseq_futex:
2157 seq = lockseq_nested_futex;
2158 break;
2159#endif
2160 case lockseq_ticket:
2161 seq = lockseq_nested_ticket;
2162 break;
2163 case lockseq_queuing:
2164 seq = lockseq_nested_queuing;
2165 break;
2166 case lockseq_drdpa:
2167 seq = lockseq_nested_drdpa;
2168 break;
2169 default:
2170 seq = lockseq_nested_queuing;
2171 }
2172 KMP_INIT_I_LOCK(lock, seq);
2173#if USE_ITT_BUILD
2174 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2175 __kmp_itt_lock_creating(ilk->lock, loc);
2176#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002177}
2178
2179/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002180void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2181 uintptr_t hint) {
2182 KMP_DEBUG_ASSERT(__kmp_init_serial);
2183 if (__kmp_env_consistency_check && user_lock == NULL) {
2184 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2185 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002186
Jonathan Peyton30419822017-05-12 18:01:32 +00002187 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002188
2189#if OMPT_SUPPORT && OMPT_OPTIONAL
2190 // This is the case, if called from omp_init_lock_with_hint:
2191 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2192 if (!codeptr)
2193 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2194 if (ompt_enabled.ompt_callback_lock_init) {
2195 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2196 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002197 __ompt_get_mutex_impl_type(user_lock),
2198 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002199 }
2200#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002201}
2202
2203/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002204void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2205 void **user_lock, uintptr_t hint) {
2206 KMP_DEBUG_ASSERT(__kmp_init_serial);
2207 if (__kmp_env_consistency_check && user_lock == NULL) {
2208 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2209 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002210
Jonathan Peyton30419822017-05-12 18:01:32 +00002211 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002212
2213#if OMPT_SUPPORT && OMPT_OPTIONAL
2214 // This is the case, if called from omp_init_lock_with_hint:
2215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2216 if (!codeptr)
2217 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2218 if (ompt_enabled.ompt_callback_lock_init) {
2219 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2220 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze4109d562019-05-20 14:21:42 +00002221 __ompt_get_mutex_impl_type(user_lock),
2222 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002223 }
2224#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002225}
2226
2227#endif // KMP_USE_DYNAMIC_LOCK
2228
Jim Cownie5e8470a2013-09-27 10:38:44 +00002229/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002230void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002231#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002232
2233 KMP_DEBUG_ASSERT(__kmp_init_serial);
2234 if (__kmp_env_consistency_check && user_lock == NULL) {
2235 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2236 }
2237 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002238
Joachim Protze82e94a52017-11-01 10:08:30 +00002239#if OMPT_SUPPORT && OMPT_OPTIONAL
2240 // This is the case, if called from omp_init_lock_with_hint:
2241 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2242 if (!codeptr)
2243 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2244 if (ompt_enabled.ompt_callback_lock_init) {
2245 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2246 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002247 __ompt_get_mutex_impl_type(user_lock),
2248 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002249 }
2250#endif
2251
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002252#else // KMP_USE_DYNAMIC_LOCK
2253
Jonathan Peyton30419822017-05-12 18:01:32 +00002254 static char const *const func = "omp_init_lock";
2255 kmp_user_lock_p lck;
2256 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257
Jonathan Peyton30419822017-05-12 18:01:32 +00002258 if (__kmp_env_consistency_check) {
2259 if (user_lock == NULL) {
2260 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002261 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 if ((__kmp_user_lock_kind == lk_tas) &&
2267 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2268 lck = (kmp_user_lock_p)user_lock;
2269 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002270#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002271 else if ((__kmp_user_lock_kind == lk_futex) &&
2272 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2273 lck = (kmp_user_lock_p)user_lock;
2274 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002276 else {
2277 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2278 }
2279 INIT_LOCK(lck);
2280 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281
Joachim Protze82e94a52017-11-01 10:08:30 +00002282#if OMPT_SUPPORT && OMPT_OPTIONAL
2283 // This is the case, if called from omp_init_lock_with_hint:
2284 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2285 if (!codeptr)
2286 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2287 if (ompt_enabled.ompt_callback_lock_init) {
2288 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2289 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002290 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002291 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002292#endif
2293
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002295 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002297
2298#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299} // __kmpc_init_lock
2300
2301/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002302void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002303#if KMP_USE_DYNAMIC_LOCK
2304
Jonathan Peyton30419822017-05-12 18:01:32 +00002305 KMP_DEBUG_ASSERT(__kmp_init_serial);
2306 if (__kmp_env_consistency_check && user_lock == NULL) {
2307 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2308 }
2309 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002310
Joachim Protze82e94a52017-11-01 10:08:30 +00002311#if OMPT_SUPPORT && OMPT_OPTIONAL
2312 // This is the case, if called from omp_init_lock_with_hint:
2313 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2314 if (!codeptr)
2315 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2316 if (ompt_enabled.ompt_callback_lock_init) {
2317 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2318 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002319 __ompt_get_mutex_impl_type(user_lock),
2320 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002321 }
2322#endif
2323
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002324#else // KMP_USE_DYNAMIC_LOCK
2325
Jonathan Peyton30419822017-05-12 18:01:32 +00002326 static char const *const func = "omp_init_nest_lock";
2327 kmp_user_lock_p lck;
2328 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002329
Jonathan Peyton30419822017-05-12 18:01:32 +00002330 if (__kmp_env_consistency_check) {
2331 if (user_lock == NULL) {
2332 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002333 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002334 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002335
Jonathan Peyton30419822017-05-12 18:01:32 +00002336 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337
Jonathan Peyton30419822017-05-12 18:01:32 +00002338 if ((__kmp_user_lock_kind == lk_tas) &&
2339 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2340 OMP_NEST_LOCK_T_SIZE)) {
2341 lck = (kmp_user_lock_p)user_lock;
2342 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002343#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002344 else if ((__kmp_user_lock_kind == lk_futex) &&
2345 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2346 OMP_NEST_LOCK_T_SIZE)) {
2347 lck = (kmp_user_lock_p)user_lock;
2348 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002350 else {
2351 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2352 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002353
Jonathan Peyton30419822017-05-12 18:01:32 +00002354 INIT_NESTED_LOCK(lck);
2355 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356
Joachim Protze82e94a52017-11-01 10:08:30 +00002357#if OMPT_SUPPORT && OMPT_OPTIONAL
2358 // This is the case, if called from omp_init_lock_with_hint:
2359 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2360 if (!codeptr)
2361 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2362 if (ompt_enabled.ompt_callback_lock_init) {
2363 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2364 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002365 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002366 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002367#endif
2368
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002370 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002372
2373#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374} // __kmpc_init_nest_lock
2375
Jonathan Peyton30419822017-05-12 18:01:32 +00002376void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002377#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002378
Jonathan Peyton30419822017-05-12 18:01:32 +00002379#if USE_ITT_BUILD
2380 kmp_user_lock_p lck;
2381 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2382 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2383 } else {
2384 lck = (kmp_user_lock_p)user_lock;
2385 }
2386 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002387#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002388#if OMPT_SUPPORT && OMPT_OPTIONAL
2389 // This is the case, if called from omp_init_lock_with_hint:
2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2391 if (!codeptr)
2392 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2393 if (ompt_enabled.ompt_callback_lock_destroy) {
2394 kmp_user_lock_p lck;
2395 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2396 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2397 } else {
2398 lck = (kmp_user_lock_p)user_lock;
2399 }
2400 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002401 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002402 }
2403#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002404 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2405#else
2406 kmp_user_lock_p lck;
2407
2408 if ((__kmp_user_lock_kind == lk_tas) &&
2409 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2410 lck = (kmp_user_lock_p)user_lock;
2411 }
2412#if KMP_USE_FUTEX
2413 else if ((__kmp_user_lock_kind == lk_futex) &&
2414 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2415 lck = (kmp_user_lock_p)user_lock;
2416 }
2417#endif
2418 else {
2419 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2420 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421
Joachim Protze82e94a52017-11-01 10:08:30 +00002422#if OMPT_SUPPORT && OMPT_OPTIONAL
2423 // This is the case, if called from omp_init_lock_with_hint:
2424 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2425 if (!codeptr)
2426 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2427 if (ompt_enabled.ompt_callback_lock_destroy) {
2428 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002429 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002430 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002431#endif
2432
Jim Cownie5e8470a2013-09-27 10:38:44 +00002433#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002434 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002435#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002436 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002437
Jonathan Peyton30419822017-05-12 18:01:32 +00002438 if ((__kmp_user_lock_kind == lk_tas) &&
2439 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2440 ;
2441 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002442#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002443 else if ((__kmp_user_lock_kind == lk_futex) &&
2444 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2445 ;
2446 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002448 else {
2449 __kmp_user_lock_free(user_lock, gtid, lck);
2450 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002451#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452} // __kmpc_destroy_lock
2453
2454/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002455void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002456#if KMP_USE_DYNAMIC_LOCK
2457
Jonathan Peyton30419822017-05-12 18:01:32 +00002458#if USE_ITT_BUILD
2459 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2460 __kmp_itt_lock_destroyed(ilk->lock);
2461#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002462#if OMPT_SUPPORT && OMPT_OPTIONAL
2463 // This is the case, if called from omp_init_lock_with_hint:
2464 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2465 if (!codeptr)
2466 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2467 if (ompt_enabled.ompt_callback_lock_destroy) {
2468 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002469 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002470 }
2471#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002472 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002473
2474#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002475
Jonathan Peyton30419822017-05-12 18:01:32 +00002476 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002477
Jonathan Peyton30419822017-05-12 18:01:32 +00002478 if ((__kmp_user_lock_kind == lk_tas) &&
2479 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2480 OMP_NEST_LOCK_T_SIZE)) {
2481 lck = (kmp_user_lock_p)user_lock;
2482 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002483#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002484 else if ((__kmp_user_lock_kind == lk_futex) &&
2485 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2486 OMP_NEST_LOCK_T_SIZE)) {
2487 lck = (kmp_user_lock_p)user_lock;
2488 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002489#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002490 else {
2491 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2492 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493
Joachim Protze82e94a52017-11-01 10:08:30 +00002494#if OMPT_SUPPORT && OMPT_OPTIONAL
2495 // This is the case, if called from omp_init_lock_with_hint:
2496 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2497 if (!codeptr)
2498 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2499 if (ompt_enabled.ompt_callback_lock_destroy) {
2500 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze4109d562019-05-20 14:21:42 +00002501 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002502 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002503#endif
2504
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002506 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507#endif /* USE_ITT_BUILD */
2508
Jonathan Peyton30419822017-05-12 18:01:32 +00002509 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002510
Jonathan Peyton30419822017-05-12 18:01:32 +00002511 if ((__kmp_user_lock_kind == lk_tas) &&
2512 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2513 OMP_NEST_LOCK_T_SIZE)) {
2514 ;
2515 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002516#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002517 else if ((__kmp_user_lock_kind == lk_futex) &&
2518 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2519 OMP_NEST_LOCK_T_SIZE)) {
2520 ;
2521 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002523 else {
2524 __kmp_user_lock_free(user_lock, gtid, lck);
2525 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002526#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527} // __kmpc_destroy_nest_lock
2528
Jonathan Peyton30419822017-05-12 18:01:32 +00002529void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2530 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002531#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002532 int tag = KMP_EXTRACT_D_TAG(user_lock);
2533#if USE_ITT_BUILD
2534 __kmp_itt_lock_acquiring(
2535 (kmp_user_lock_p)
2536 user_lock); // itt function will get to the right lock object.
2537#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002538#if OMPT_SUPPORT && OMPT_OPTIONAL
2539 // This is the case, if called from omp_init_lock_with_hint:
2540 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 if (!codeptr)
2542 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2543 if (ompt_enabled.ompt_callback_mutex_acquire) {
2544 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2545 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002546 __ompt_get_mutex_impl_type(user_lock),
2547 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002548 }
2549#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002550#if KMP_USE_INLINED_TAS
2551 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2552 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2553 } else
2554#elif KMP_USE_INLINED_FUTEX
2555 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2556 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2557 } else
2558#endif
2559 {
2560 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2561 }
2562#if USE_ITT_BUILD
2563 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2564#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002565#if OMPT_SUPPORT && OMPT_OPTIONAL
2566 if (ompt_enabled.ompt_callback_mutex_acquired) {
2567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002568 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002569 }
2570#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002571
2572#else // KMP_USE_DYNAMIC_LOCK
2573
Jonathan Peyton30419822017-05-12 18:01:32 +00002574 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002575
Jonathan Peyton30419822017-05-12 18:01:32 +00002576 if ((__kmp_user_lock_kind == lk_tas) &&
2577 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2578 lck = (kmp_user_lock_p)user_lock;
2579 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002580#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002581 else if ((__kmp_user_lock_kind == lk_futex) &&
2582 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2583 lck = (kmp_user_lock_p)user_lock;
2584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002585#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002586 else {
2587 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002589
2590#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002591 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002593#if OMPT_SUPPORT && OMPT_OPTIONAL
2594 // This is the case, if called from omp_init_lock_with_hint:
2595 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2596 if (!codeptr)
2597 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2598 if (ompt_enabled.ompt_callback_mutex_acquire) {
2599 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2600 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00002601 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002602 }
2603#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606
2607#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002608 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610
Joachim Protze82e94a52017-11-01 10:08:30 +00002611#if OMPT_SUPPORT && OMPT_OPTIONAL
2612 if (ompt_enabled.ompt_callback_mutex_acquired) {
2613 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002614 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002615 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002616#endif
2617
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002618#endif // KMP_USE_DYNAMIC_LOCK
2619}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620
Jonathan Peyton30419822017-05-12 18:01:32 +00002621void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002622#if KMP_USE_DYNAMIC_LOCK
2623
Jonathan Peyton30419822017-05-12 18:01:32 +00002624#if USE_ITT_BUILD
2625 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2626#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002627#if OMPT_SUPPORT && OMPT_OPTIONAL
2628 // This is the case, if called from omp_init_lock_with_hint:
2629 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2630 if (!codeptr)
2631 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2632 if (ompt_enabled.enabled) {
2633 if (ompt_enabled.ompt_callback_mutex_acquire) {
2634 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2635 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002636 __ompt_get_mutex_impl_type(user_lock),
2637 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002638 }
2639 }
2640#endif
2641 int acquire_status =
2642 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002643 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002644#if USE_ITT_BUILD
2645 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002646#endif
2647
Joachim Protze82e94a52017-11-01 10:08:30 +00002648#if OMPT_SUPPORT && OMPT_OPTIONAL
2649 if (ompt_enabled.enabled) {
2650 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2651 if (ompt_enabled.ompt_callback_mutex_acquired) {
2652 // lock_first
2653 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002654 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2655 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002656 }
2657 } else {
2658 if (ompt_enabled.ompt_callback_nest_lock) {
2659 // lock_next
2660 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002661 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002662 }
2663 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002664 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002665#endif
2666
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002667#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002668 int acquire_status;
2669 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670
Jonathan Peyton30419822017-05-12 18:01:32 +00002671 if ((__kmp_user_lock_kind == lk_tas) &&
2672 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2673 OMP_NEST_LOCK_T_SIZE)) {
2674 lck = (kmp_user_lock_p)user_lock;
2675 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002676#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002677 else if ((__kmp_user_lock_kind == lk_futex) &&
2678 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2679 OMP_NEST_LOCK_T_SIZE)) {
2680 lck = (kmp_user_lock_p)user_lock;
2681 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002683 else {
2684 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2685 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686
2687#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002688 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002689#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002690#if OMPT_SUPPORT && OMPT_OPTIONAL
2691 // This is the case, if called from omp_init_lock_with_hint:
2692 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2693 if (!codeptr)
2694 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2695 if (ompt_enabled.enabled) {
2696 if (ompt_enabled.ompt_callback_mutex_acquire) {
2697 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2698 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002699 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2700 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002701 }
2702 }
2703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002704
Jonathan Peyton30419822017-05-12 18:01:32 +00002705 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002706
2707#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002708 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002710
Joachim Protze82e94a52017-11-01 10:08:30 +00002711#if OMPT_SUPPORT && OMPT_OPTIONAL
2712 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002713 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002714 if (ompt_enabled.ompt_callback_mutex_acquired) {
2715 // lock_first
2716 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002717 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002718 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002720 if (ompt_enabled.ompt_callback_nest_lock) {
2721 // lock_next
2722 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002723 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002724 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002725 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002727#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002728
2729#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002730}
2731
Jonathan Peyton30419822017-05-12 18:01:32 +00002732void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002733#if KMP_USE_DYNAMIC_LOCK
2734
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 int tag = KMP_EXTRACT_D_TAG(user_lock);
2736#if USE_ITT_BUILD
2737 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2738#endif
2739#if KMP_USE_INLINED_TAS
2740 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2741 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2742 } else
2743#elif KMP_USE_INLINED_FUTEX
2744 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2745 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2746 } else
2747#endif
2748 {
2749 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2750 }
2751
Joachim Protze82e94a52017-11-01 10:08:30 +00002752#if OMPT_SUPPORT && OMPT_OPTIONAL
2753 // This is the case, if called from omp_init_lock_with_hint:
2754 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2755 if (!codeptr)
2756 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2757 if (ompt_enabled.ompt_callback_mutex_released) {
2758 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002759 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002760 }
2761#endif
2762
Jonathan Peyton30419822017-05-12 18:01:32 +00002763#else // KMP_USE_DYNAMIC_LOCK
2764
2765 kmp_user_lock_p lck;
2766
2767 /* Can't use serial interval since not block structured */
2768 /* release the lock */
2769
2770 if ((__kmp_user_lock_kind == lk_tas) &&
2771 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2772#if KMP_OS_LINUX && \
2773 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2774// "fast" path implemented to fix customer performance issue
2775#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002776 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002778 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2779 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002780
2781#if OMPT_SUPPORT && OMPT_OPTIONAL
2782 // This is the case, if called from omp_init_lock_with_hint:
2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784 if (!codeptr)
2785 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786 if (ompt_enabled.ompt_callback_mutex_released) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002789 }
2790#endif
2791
Jonathan Peyton30419822017-05-12 18:01:32 +00002792 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002793#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002794 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002797#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002798 else if ((__kmp_user_lock_kind == lk_futex) &&
2799 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2800 lck = (kmp_user_lock_p)user_lock;
2801 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002803 else {
2804 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2805 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806
2807#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002808 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809#endif /* USE_ITT_BUILD */
2810
Jonathan Peyton30419822017-05-12 18:01:32 +00002811 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002812
Joachim Protze82e94a52017-11-01 10:08:30 +00002813#if OMPT_SUPPORT && OMPT_OPTIONAL
2814 // This is the case, if called from omp_init_lock_with_hint:
2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816 if (!codeptr)
2817 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2818 if (ompt_enabled.ompt_callback_mutex_released) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002820 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002821 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002822#endif
2823
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002824#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825}
2826
2827/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002828void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002829#if KMP_USE_DYNAMIC_LOCK
2830
Jonathan Peyton30419822017-05-12 18:01:32 +00002831#if USE_ITT_BUILD
2832 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2833#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002834 int release_status =
2835 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002836 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002837
2838#if OMPT_SUPPORT && OMPT_OPTIONAL
2839 // This is the case, if called from omp_init_lock_with_hint:
2840 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2841 if (!codeptr)
2842 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2843 if (ompt_enabled.enabled) {
2844 if (release_status == KMP_LOCK_RELEASED) {
2845 if (ompt_enabled.ompt_callback_mutex_released) {
2846 // release_lock_last
2847 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002848 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2849 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002850 }
2851 } else if (ompt_enabled.ompt_callback_nest_lock) {
2852 // release_lock_prev
2853 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002854 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002855 }
2856 }
2857#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002858
2859#else // KMP_USE_DYNAMIC_LOCK
2860
2861 kmp_user_lock_p lck;
2862
2863 /* Can't use serial interval since not block structured */
2864
2865 if ((__kmp_user_lock_kind == lk_tas) &&
2866 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2867 OMP_NEST_LOCK_T_SIZE)) {
2868#if KMP_OS_LINUX && \
2869 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2870 // "fast" path implemented to fix customer performance issue
2871 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2872#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002873 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002874#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002875
2876#if OMPT_SUPPORT && OMPT_OPTIONAL
2877 int release_status = KMP_LOCK_STILL_HELD;
2878#endif
2879
Jonathan Peyton30419822017-05-12 18:01:32 +00002880 if (--(tl->lk.depth_locked) == 0) {
2881 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002882#if OMPT_SUPPORT && OMPT_OPTIONAL
2883 release_status = KMP_LOCK_RELEASED;
2884#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002885 }
2886 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002887
2888#if OMPT_SUPPORT && OMPT_OPTIONAL
2889 // This is the case, if called from omp_init_lock_with_hint:
2890 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2891 if (!codeptr)
2892 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2893 if (ompt_enabled.enabled) {
2894 if (release_status == KMP_LOCK_RELEASED) {
2895 if (ompt_enabled.ompt_callback_mutex_released) {
2896 // release_lock_last
2897 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002898 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002899 }
2900 } else if (ompt_enabled.ompt_callback_nest_lock) {
2901 // release_lock_previous
2902 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002903 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002904 }
2905 }
2906#endif
2907
Jonathan Peyton30419822017-05-12 18:01:32 +00002908 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002909#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002910 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002912 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002913#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002914 else if ((__kmp_user_lock_kind == lk_futex) &&
2915 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2916 OMP_NEST_LOCK_T_SIZE)) {
2917 lck = (kmp_user_lock_p)user_lock;
2918 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002919#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002920 else {
2921 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2922 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002923
2924#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002925 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002926#endif /* USE_ITT_BUILD */
2927
Jonathan Peyton30419822017-05-12 18:01:32 +00002928 int release_status;
2929 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002930#if OMPT_SUPPORT && OMPT_OPTIONAL
2931 // This is the case, if called from omp_init_lock_with_hint:
2932 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2933 if (!codeptr)
2934 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2935 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002936 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002937 if (ompt_enabled.ompt_callback_mutex_released) {
2938 // release_lock_last
2939 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze4109d562019-05-20 14:21:42 +00002940 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002942 } else if (ompt_enabled.ompt_callback_nest_lock) {
2943 // release_lock_previous
2944 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00002945 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002946 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002947 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002948#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002949
2950#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002951}
2952
2953/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002954int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2955 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002956
2957#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002958 int rc;
2959 int tag = KMP_EXTRACT_D_TAG(user_lock);
2960#if USE_ITT_BUILD
2961 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2962#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002963#if OMPT_SUPPORT && OMPT_OPTIONAL
2964 // This is the case, if called from omp_init_lock_with_hint:
2965 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2966 if (!codeptr)
2967 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2968 if (ompt_enabled.ompt_callback_mutex_acquire) {
2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2970 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00002971 __ompt_get_mutex_impl_type(user_lock),
2972 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002973 }
2974#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002975#if KMP_USE_INLINED_TAS
2976 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2977 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2978 } else
2979#elif KMP_USE_INLINED_FUTEX
2980 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2981 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2982 } else
2983#endif
2984 {
2985 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2986 }
2987 if (rc) {
2988#if USE_ITT_BUILD
2989 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2990#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002991#if OMPT_SUPPORT && OMPT_OPTIONAL
2992 if (ompt_enabled.ompt_callback_mutex_acquired) {
2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00002994 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002995 }
2996#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002997 return FTN_TRUE;
2998 } else {
2999#if USE_ITT_BUILD
3000 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3001#endif
3002 return FTN_FALSE;
3003 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003004
3005#else // KMP_USE_DYNAMIC_LOCK
3006
Jonathan Peyton30419822017-05-12 18:01:32 +00003007 kmp_user_lock_p lck;
3008 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009
Jonathan Peyton30419822017-05-12 18:01:32 +00003010 if ((__kmp_user_lock_kind == lk_tas) &&
3011 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3012 lck = (kmp_user_lock_p)user_lock;
3013 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003014#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003015 else if ((__kmp_user_lock_kind == lk_futex) &&
3016 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3017 lck = (kmp_user_lock_p)user_lock;
3018 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003020 else {
3021 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3022 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003023
3024#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003025 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003026#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003027#if OMPT_SUPPORT && OMPT_OPTIONAL
3028 // This is the case, if called from omp_init_lock_with_hint:
3029 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3030 if (!codeptr)
3031 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3032 if (ompt_enabled.ompt_callback_mutex_acquire) {
3033 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3034 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze4109d562019-05-20 14:21:42 +00003035 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003036 }
3037#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038
Jonathan Peyton30419822017-05-12 18:01:32 +00003039 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003040#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 if (rc) {
3042 __kmp_itt_lock_acquired(lck);
3043 } else {
3044 __kmp_itt_lock_cancelled(lck);
3045 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003046#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003047#if OMPT_SUPPORT && OMPT_OPTIONAL
3048 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3049 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003050 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003051 }
3052#endif
3053
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055
Jonathan Peyton30419822017-05-12 18:01:32 +00003056/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003057
3058#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003059}
3060
3061/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003062int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003063#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003064 int rc;
3065#if USE_ITT_BUILD
3066 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3067#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003068#if OMPT_SUPPORT && OMPT_OPTIONAL
3069 // This is the case, if called from omp_init_lock_with_hint:
3070 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3071 if (!codeptr)
3072 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3073 if (ompt_enabled.ompt_callback_mutex_acquire) {
3074 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3075 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003076 __ompt_get_mutex_impl_type(user_lock),
3077 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003078 }
3079#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003080 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3081#if USE_ITT_BUILD
3082 if (rc) {
3083 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3084 } else {
3085 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3086 }
3087#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003088#if OMPT_SUPPORT && OMPT_OPTIONAL
3089 if (ompt_enabled.enabled && rc) {
3090 if (rc == 1) {
3091 if (ompt_enabled.ompt_callback_mutex_acquired) {
3092 // lock_first
3093 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003094 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3095 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003096 }
3097 } else {
3098 if (ompt_enabled.ompt_callback_nest_lock) {
3099 // lock_next
3100 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003101 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003102 }
3103 }
3104 }
3105#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003106 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003107
3108#else // KMP_USE_DYNAMIC_LOCK
3109
Jonathan Peyton30419822017-05-12 18:01:32 +00003110 kmp_user_lock_p lck;
3111 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003112
Jonathan Peyton30419822017-05-12 18:01:32 +00003113 if ((__kmp_user_lock_kind == lk_tas) &&
3114 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3115 OMP_NEST_LOCK_T_SIZE)) {
3116 lck = (kmp_user_lock_p)user_lock;
3117 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003118#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003119 else if ((__kmp_user_lock_kind == lk_futex) &&
3120 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3121 OMP_NEST_LOCK_T_SIZE)) {
3122 lck = (kmp_user_lock_p)user_lock;
3123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003124#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003125 else {
3126 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3127 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003128
3129#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003130 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131#endif /* USE_ITT_BUILD */
3132
Joachim Protze82e94a52017-11-01 10:08:30 +00003133#if OMPT_SUPPORT && OMPT_OPTIONAL
3134 // This is the case, if called from omp_init_lock_with_hint:
3135 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3136 if (!codeptr)
3137 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3138 if (ompt_enabled.enabled) &&
3139 ompt_enabled.ompt_callback_mutex_acquire) {
3140 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3141 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze4109d562019-05-20 14:21:42 +00003142 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3143 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003144 }
3145#endif
3146
Jonathan Peyton30419822017-05-12 18:01:32 +00003147 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003149 if (rc) {
3150 __kmp_itt_lock_acquired(lck);
3151 } else {
3152 __kmp_itt_lock_cancelled(lck);
3153 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003154#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003155#if OMPT_SUPPORT && OMPT_OPTIONAL
3156 if (ompt_enabled.enabled && rc) {
3157 if (rc == 1) {
3158 if (ompt_enabled.ompt_callback_mutex_acquired) {
3159 // lock_first
3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze4109d562019-05-20 14:21:42 +00003161 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003162 }
3163 } else {
3164 if (ompt_enabled.ompt_callback_nest_lock) {
3165 // lock_next
3166 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze4109d562019-05-20 14:21:42 +00003167 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003168 }
3169 }
3170 }
3171#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003172 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173
Jonathan Peyton30419822017-05-12 18:01:32 +00003174/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003175
3176#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003177}
3178
Jonathan Peyton30419822017-05-12 18:01:32 +00003179// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003180
Jonathan Peyton30419822017-05-12 18:01:32 +00003181// keep the selected method in a thread local structure for cross-function
3182// usage: will be used in __kmpc_end_reduce* functions;
3183// another solution: to re-determine the method one more time in
3184// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003185// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003186#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3187 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003188
Jonathan Peyton30419822017-05-12 18:01:32 +00003189#define __KMP_GET_REDUCTION_METHOD(gtid) \
3190 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003191
Jonathan Peyton30419822017-05-12 18:01:32 +00003192// description of the packed_reduction_method variable: look at the macros in
3193// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003194
3195// used in a critical section reduce block
3196static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003197__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3198 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003199
Jonathan Peyton30419822017-05-12 18:01:32 +00003200 // this lock was visible to a customer and to the threading profile tool as a
3201 // serial overhead span (although it's used for an internal purpose only)
3202 // why was it visible in previous implementation?
3203 // should we keep it visible in new reduce block?
3204 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003206#if KMP_USE_DYNAMIC_LOCK
3207
Jonathan Peyton30419822017-05-12 18:01:32 +00003208 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3209 // Check if it is initialized.
3210 if (*lk == 0) {
3211 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3212 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3213 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003214 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003215 __kmp_init_indirect_csptr(crit, loc, global_tid,
3216 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003217 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003218 }
3219 // Branch for accessing the actual lock object and set operation. This
3220 // branching is inevitable since this lock initialization does not follow the
3221 // normal dispatch path (lock table is not used).
3222 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3223 lck = (kmp_user_lock_p)lk;
3224 KMP_DEBUG_ASSERT(lck != NULL);
3225 if (__kmp_env_consistency_check) {
3226 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3227 }
3228 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3229 } else {
3230 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3231 lck = ilk->lock;
3232 KMP_DEBUG_ASSERT(lck != NULL);
3233 if (__kmp_env_consistency_check) {
3234 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3235 }
3236 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3237 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003238
3239#else // KMP_USE_DYNAMIC_LOCK
3240
Jonathan Peyton30419822017-05-12 18:01:32 +00003241 // We know that the fast reduction code is only emitted by Intel compilers
3242 // with 32 byte critical sections. If there isn't enough space, then we
3243 // have to use a pointer.
3244 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3245 lck = (kmp_user_lock_p)crit;
3246 } else {
3247 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3248 }
3249 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003250
Jonathan Peyton30419822017-05-12 18:01:32 +00003251 if (__kmp_env_consistency_check)
3252 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003255
3256#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003257}
3258
3259// used in a critical section reduce block
3260static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003261__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3262 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003266#if KMP_USE_DYNAMIC_LOCK
3267
Jonathan Peyton30419822017-05-12 18:01:32 +00003268 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3269 lck = (kmp_user_lock_p)crit;
3270 if (__kmp_env_consistency_check)
3271 __kmp_pop_sync(global_tid, ct_critical, loc);
3272 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3273 } else {
3274 kmp_indirect_lock_t *ilk =
3275 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3276 if (__kmp_env_consistency_check)
3277 __kmp_pop_sync(global_tid, ct_critical, loc);
3278 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3279 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003280
3281#else // KMP_USE_DYNAMIC_LOCK
3282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 // We know that the fast reduction code is only emitted by Intel compilers
3284 // with 32 byte critical sections. If there isn't enough space, then we have
3285 // to use a pointer.
3286 if (__kmp_base_user_lock_size > 32) {
3287 lck = *((kmp_user_lock_p *)crit);
3288 KMP_ASSERT(lck != NULL);
3289 } else {
3290 lck = (kmp_user_lock_p)crit;
3291 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003292
Jonathan Peyton30419822017-05-12 18:01:32 +00003293 if (__kmp_env_consistency_check)
3294 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003295
Jonathan Peyton30419822017-05-12 18:01:32 +00003296 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003298#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299} // __kmp_end_critical_section_reduce_block
3300
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003301static __forceinline int
3302__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3303 int *task_state) {
3304 kmp_team_t *team;
3305
3306 // Check if we are inside the teams construct?
3307 if (th->th.th_teams_microtask) {
3308 *team_p = team = th->th.th_team;
3309 if (team->t.t_level == th->th.th_teams_level) {
3310 // This is reduction at teams construct.
3311 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3312 // Let's swap teams temporarily for the reduction.
3313 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3314 th->th.th_team = team->t.t_parent;
3315 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3316 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3317 *task_state = th->th.th_task_state;
3318 th->th.th_task_state = 0;
3319
3320 return 1;
3321 }
3322 }
3323 return 0;
3324}
3325
3326static __forceinline void
3327__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3328 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3329 th->th.th_info.ds.ds_tid = 0;
3330 th->th.th_team = team;
3331 th->th.th_team_nproc = team->t.t_nproc;
3332 th->th.th_task_team = team->t.t_task_team[task_state];
3333 th->th.th_task_state = task_state;
3334}
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003335
Jim Cownie5e8470a2013-09-27 10:38:44 +00003336/* 2.a.i. Reduce Block without a terminating barrier */
3337/*!
3338@ingroup SYNCHRONIZATION
3339@param loc source location information
3340@param global_tid global thread number
3341@param num_vars number of items (variables) to be reduced
3342@param reduce_size size of data in bytes to be reduced
3343@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003344@param reduce_func callback function providing reduction operation on two
3345operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003346@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003347@result 1 for the master thread, 0 for all other team threads, 2 for all team
3348threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003349
3350The nowait version is used for a reduce clause with the nowait argument.
3351*/
3352kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003353__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3354 size_t reduce_size, void *reduce_data,
3355 void (*reduce_func)(void *lhs_data, void *rhs_data),
3356 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003357
Jonathan Peyton30419822017-05-12 18:01:32 +00003358 KMP_COUNT_BLOCK(REDUCE_nowait);
3359 int retval = 0;
3360 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonathan Peyton30419822017-05-12 18:01:32 +00003361 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003362 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003363 int teams_swapped = 0, task_state;
Jonathan Peyton30419822017-05-12 18:01:32 +00003364 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003365 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003366
Jonathan Peyton30419822017-05-12 18:01:32 +00003367 // why do we need this initialization here at all?
3368 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003369
Jonathan Peyton30419822017-05-12 18:01:32 +00003370 // do not call __kmp_serial_initialize(), it will be called by
3371 // __kmp_parallel_initialize() if needed
3372 // possible detection of false-positive race by the threadchecker ???
3373 if (!TCR_4(__kmp_init_parallel))
3374 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003375
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003376 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003377
Jonathan Peyton30419822017-05-12 18:01:32 +00003378// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003379#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003380 if (__kmp_env_consistency_check)
3381 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003382#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 if (__kmp_env_consistency_check)
3384 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003385#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003386
Jonathan Peyton30419822017-05-12 18:01:32 +00003387 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003388 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003389
Jonathan Peyton30419822017-05-12 18:01:32 +00003390 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3391 // the value should be kept in a variable
3392 // the variable should be either a construct-specific or thread-specific
3393 // property, not a team specific property
3394 // (a thread can reach the next reduce block on the next construct, reduce
3395 // method may differ on the next construct)
3396 // an ident_t "loc" parameter could be used as a construct-specific property
3397 // (what if loc == 0?)
3398 // (if both construct-specific and team-specific variables were shared,
3399 // then unness extra syncs should be needed)
3400 // a thread-specific variable is better regarding two issues above (next
3401 // construct and extra syncs)
3402 // a thread-specific "th_local.reduction_method" variable is used currently
3403 // each thread executes 'determine' and 'set' lines (no need to execute by one
3404 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003405
Jonathan Peyton30419822017-05-12 18:01:32 +00003406 packed_reduction_method = __kmp_determine_reduction_method(
3407 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3408 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003410 OMPT_REDUCTION_DECL(th, global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003412
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003413 OMPT_REDUCTION_BEGIN;
3414
Jonathan Peyton30419822017-05-12 18:01:32 +00003415 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3416 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003420 OMPT_REDUCTION_BEGIN;
3421
Jonathan Peyton30419822017-05-12 18:01:32 +00003422 // usage: if team size == 1, no synchronization is required ( Intel
3423 // platforms only )
3424 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003425
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003427
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3431 // won't be called by the code gen)
3432 // (it's not quite good, because the checking block has been closed by
3433 // this 'pop',
3434 // but atomic operation has not been executed yet, will be executed
3435 // slightly later, literally on next instruction)
3436 if (__kmp_env_consistency_check)
3437 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438
Jonathan Peyton30419822017-05-12 18:01:32 +00003439 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3440 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441
Jonathan Peyton30419822017-05-12 18:01:32 +00003442// AT: performance issue: a real barrier here
3443// AT: (if master goes slow, other threads are blocked here waiting for the
3444// master to come and release them)
3445// AT: (it's not what a customer might expect specifying NOWAIT clause)
3446// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3447// be confusing to a customer)
3448// AT: another implementation of *barrier_gather*nowait() (or some other design)
3449// might go faster and be more in line with sense of NOWAIT
3450// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003451
Jonathan Peyton30419822017-05-12 18:01:32 +00003452// this barrier should be invisible to a customer and to the threading profile
3453// tool (it's neither a terminating barrier nor customer's code, it's
3454// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003455#if OMPT_SUPPORT
3456 // JP: can this barrier potentially leed to task scheduling?
3457 // JP: as long as there is a barrier in the implementation, OMPT should and
3458 // will provide the barrier events
3459 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003460 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003461 if (ompt_enabled.enabled) {
3462 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003463 if (ompt_frame->enter_frame.ptr == NULL)
3464 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003465 OMPT_STORE_RETURN_ADDRESS(global_tid);
3466 }
3467#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003468#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003469 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003470#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003471 retval =
3472 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3473 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3474 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003475#if OMPT_SUPPORT && OMPT_OPTIONAL
3476 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003477 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003478 }
3479#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003480
Jonathan Peyton30419822017-05-12 18:01:32 +00003481 // all other workers except master should do this pop here
3482 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3483 if (__kmp_env_consistency_check) {
3484 if (retval == 0) {
3485 __kmp_pop_sync(global_tid, ct_reduce, loc);
3486 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003488
3489 } else {
3490
3491 // should never reach this block
3492 KMP_ASSERT(0); // "unexpected method"
3493 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003494 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003495 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003496 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003497 KA_TRACE(
3498 10,
3499 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3500 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501
Jonathan Peyton30419822017-05-12 18:01:32 +00003502 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003503}
3504
3505/*!
3506@ingroup SYNCHRONIZATION
3507@param loc source location information
3508@param global_tid global thread id.
3509@param lck pointer to the unique lock data structure
3510
3511Finish the execution of a reduce nowait.
3512*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003513void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3514 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003515
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003517
Jonathan Peyton30419822017-05-12 18:01:32 +00003518 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003519 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003520
Jonathan Peyton30419822017-05-12 18:01:32 +00003521 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003522
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003523 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3524
Jonathan Peyton30419822017-05-12 18:01:32 +00003525 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003526
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003528 OMPT_REDUCTION_END;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003529
Jonathan Peyton30419822017-05-12 18:01:32 +00003530 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003531
Jonathan Peyton30419822017-05-12 18:01:32 +00003532 // usage: if team size == 1, no synchronization is required ( on Intel
3533 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003534
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003535 OMPT_REDUCTION_END;
3536
Jonathan Peyton30419822017-05-12 18:01:32 +00003537 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538
Jonathan Peyton30419822017-05-12 18:01:32 +00003539 // neither master nor other workers should get here
3540 // (code gen does not generate this call in case 2: atomic reduce block)
3541 // actually it's better to remove this elseif at all;
3542 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3545 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546
Jonathan Peyton30419822017-05-12 18:01:32 +00003547 // only master gets here
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003548 // OMPT: tree reduction is annotated in the barrier code
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549
Jonathan Peyton30419822017-05-12 18:01:32 +00003550 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003551
Jonathan Peyton30419822017-05-12 18:01:32 +00003552 // should never reach this block
3553 KMP_ASSERT(0); // "unexpected method"
3554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003555
Jonathan Peyton30419822017-05-12 18:01:32 +00003556 if (__kmp_env_consistency_check)
3557 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003558
Jonathan Peyton30419822017-05-12 18:01:32 +00003559 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3560 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003561
Jonathan Peyton30419822017-05-12 18:01:32 +00003562 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003563}
3564
3565/* 2.a.ii. Reduce Block with a terminating barrier */
3566
3567/*!
3568@ingroup SYNCHRONIZATION
3569@param loc source location information
3570@param global_tid global thread number
3571@param num_vars number of items (variables) to be reduced
3572@param reduce_size size of data in bytes to be reduced
3573@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003574@param reduce_func callback function providing reduction operation on two
3575operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003576@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003577@result 1 for the master thread, 0 for all other team threads, 2 for all team
3578threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003579
3580A blocking reduce that includes an implicit barrier.
3581*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003582kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3583 size_t reduce_size, void *reduce_data,
3584 void (*reduce_func)(void *lhs_data, void *rhs_data),
3585 kmp_critical_name *lck) {
3586 KMP_COUNT_BLOCK(REDUCE_wait);
3587 int retval = 0;
3588 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003589 kmp_info_t *th;
3590 kmp_team_t *team;
3591 int teams_swapped = 0, task_state;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003592
Jonathan Peyton30419822017-05-12 18:01:32 +00003593 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003594 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595
Jonathan Peyton30419822017-05-12 18:01:32 +00003596 // why do we need this initialization here at all?
3597 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003598
Jonathan Peyton30419822017-05-12 18:01:32 +00003599 // do not call __kmp_serial_initialize(), it will be called by
3600 // __kmp_parallel_initialize() if needed
3601 // possible detection of false-positive race by the threadchecker ???
3602 if (!TCR_4(__kmp_init_parallel))
3603 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003604
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003605 __kmp_resume_if_soft_paused();
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003606
Jonathan Peyton30419822017-05-12 18:01:32 +00003607// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003608#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003609 if (__kmp_env_consistency_check)
3610 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003611#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003612 if (__kmp_env_consistency_check)
3613 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003614#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003615
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003616 th = __kmp_thread_from_gtid(global_tid);
3617 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003618
Jonathan Peyton30419822017-05-12 18:01:32 +00003619 packed_reduction_method = __kmp_determine_reduction_method(
3620 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3621 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003622
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003623 OMPT_REDUCTION_DECL(th, global_tid);
3624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003626
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003627 OMPT_REDUCTION_BEGIN;
Jonathan Peyton30419822017-05-12 18:01:32 +00003628 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3629 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003630
Jonathan Peyton30419822017-05-12 18:01:32 +00003631 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003632
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003633 OMPT_REDUCTION_BEGIN;
Jonathan Peyton30419822017-05-12 18:01:32 +00003634 // usage: if team size == 1, no synchronization is required ( Intel
3635 // platforms only )
3636 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003637
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639
Jonathan Peyton30419822017-05-12 18:01:32 +00003640 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003641
Jonathan Peyton30419822017-05-12 18:01:32 +00003642 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3643 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644
Jonathan Peyton30419822017-05-12 18:01:32 +00003645// case tree_reduce_block:
3646// this barrier should be visible to a customer and to the threading profile
3647// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003648#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003649 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003650 if (ompt_enabled.enabled) {
3651 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003652 if (ompt_frame->enter_frame.ptr == NULL)
3653 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003654 OMPT_STORE_RETURN_ADDRESS(global_tid);
3655 }
3656#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003657#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003658 __kmp_threads[global_tid]->th.th_ident =
3659 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003660#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003661 retval =
3662 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3663 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3664 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003665#if OMPT_SUPPORT && OMPT_OPTIONAL
3666 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003667 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003668 }
3669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670
Jonathan Peyton30419822017-05-12 18:01:32 +00003671 // all other workers except master should do this pop here
3672 // ( none of other workers except master will enter __kmpc_end_reduce() )
3673 if (__kmp_env_consistency_check) {
3674 if (retval == 0) { // 0: all other workers; 1: master
3675 __kmp_pop_sync(global_tid, ct_reduce, loc);
3676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003677 }
3678
Jonathan Peyton30419822017-05-12 18:01:32 +00003679 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003680
Jonathan Peyton30419822017-05-12 18:01:32 +00003681 // should never reach this block
3682 KMP_ASSERT(0); // "unexpected method"
3683 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003684 if (teams_swapped) {
3685 __kmp_restore_swapped_teams(th, team, task_state);
3686 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003687
3688 KA_TRACE(10,
3689 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3690 global_tid, packed_reduction_method, retval));
Jonathan Peyton30419822017-05-12 18:01:32 +00003691 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692}
3693
3694/*!
3695@ingroup SYNCHRONIZATION
3696@param loc source location information
3697@param global_tid global thread id.
3698@param lck pointer to the unique lock data structure
3699
3700Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003701The <tt>lck</tt> pointer must be the same as that used in the corresponding
3702start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003703*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003704void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3705 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706
Jonathan Peyton30419822017-05-12 18:01:32 +00003707 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003708 kmp_info_t *th;
3709 kmp_team_t *team;
3710 int teams_swapped = 0, task_state;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711
Jonathan Peyton30419822017-05-12 18:01:32 +00003712 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003713 __kmp_assert_valid_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003715 th = __kmp_thread_from_gtid(global_tid);
3716 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003717
Jonathan Peyton30419822017-05-12 18:01:32 +00003718 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003719
Jonathan Peyton30419822017-05-12 18:01:32 +00003720 // this barrier should be visible to a customer and to the threading profile
3721 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003722 OMPT_REDUCTION_DECL(th, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003723
Jonathan Peyton30419822017-05-12 18:01:32 +00003724 if (packed_reduction_method == critical_reduce_block) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003725 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003726
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003727 OMPT_REDUCTION_END;
3728
Jonathan Peyton30419822017-05-12 18:01:32 +00003729// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003730#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003731 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003732 if (ompt_enabled.enabled) {
3733 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003734 if (ompt_frame->enter_frame.ptr == NULL)
3735 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003736 OMPT_STORE_RETURN_ADDRESS(global_tid);
3737 }
3738#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003739#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003740 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003741#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003742 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003743#if OMPT_SUPPORT && OMPT_OPTIONAL
3744 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003745 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003746 }
3747#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748
Jonathan Peyton30419822017-05-12 18:01:32 +00003749 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003750
protze@itc.rwth-aachen.de3356e262019-12-27 14:39:50 +01003751 OMPT_REDUCTION_END;
3752
Jonathan Peyton30419822017-05-12 18:01:32 +00003753// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754
Jonathan Peyton30419822017-05-12 18:01:32 +00003755// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003756#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003757 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003758 if (ompt_enabled.enabled) {
3759 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003760 if (ompt_frame->enter_frame.ptr == NULL)
3761 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003762 OMPT_STORE_RETURN_ADDRESS(global_tid);
3763 }
3764#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003765#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003768 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003769#if OMPT_SUPPORT && OMPT_OPTIONAL
3770 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003771 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003772 }
3773#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003774
Jonathan Peyton30419822017-05-12 18:01:32 +00003775 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003776
Joachim Protze82e94a52017-11-01 10:08:30 +00003777#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003778 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003779 if (ompt_enabled.enabled) {
3780 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003781 if (ompt_frame->enter_frame.ptr == NULL)
3782 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003783 OMPT_STORE_RETURN_ADDRESS(global_tid);
3784 }
3785#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003786// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003787#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003788 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003789#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003791#if OMPT_SUPPORT && OMPT_OPTIONAL
3792 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003793 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003794 }
3795#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003796
Jonathan Peyton30419822017-05-12 18:01:32 +00003797 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3798 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800 // only master executes here (master releases all other workers)
3801 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3802 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003803
Jonathan Peyton30419822017-05-12 18:01:32 +00003804 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003805
Jonathan Peyton30419822017-05-12 18:01:32 +00003806 // should never reach this block
3807 KMP_ASSERT(0); // "unexpected method"
3808 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003809 if (teams_swapped) {
3810 __kmp_restore_swapped_teams(th, team, task_state);
3811 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003812
Jonathan Peyton30419822017-05-12 18:01:32 +00003813 if (__kmp_env_consistency_check)
3814 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003815
Jonathan Peyton30419822017-05-12 18:01:32 +00003816 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3817 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003818
Jonathan Peyton30419822017-05-12 18:01:32 +00003819 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003820}
3821
3822#undef __KMP_GET_REDUCTION_METHOD
3823#undef __KMP_SET_REDUCTION_METHOD
3824
Jonathan Peyton30419822017-05-12 18:01:32 +00003825/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003826
Jonathan Peyton30419822017-05-12 18:01:32 +00003827kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003828
Jonathan Peyton30419822017-05-12 18:01:32 +00003829 kmp_int32 gtid;
3830 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003831
Jonathan Peyton30419822017-05-12 18:01:32 +00003832 gtid = __kmp_get_gtid();
3833 if (gtid < 0) {
3834 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003835 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003836 thread = __kmp_thread_from_gtid(gtid);
3837 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003838
3839} // __kmpc_get_taskid
3840
Jonathan Peyton30419822017-05-12 18:01:32 +00003841kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842
Jonathan Peyton30419822017-05-12 18:01:32 +00003843 kmp_int32 gtid;
3844 kmp_info_t *thread;
3845 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 gtid = __kmp_get_gtid();
3848 if (gtid < 0) {
3849 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003850 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 thread = __kmp_thread_from_gtid(gtid);
3852 parent_task = thread->th.th_current_task->td_parent;
3853 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003854
3855} // __kmpc_get_parent_taskid
3856
Jonathan Peyton71909c52016-03-02 22:42:06 +00003857/*!
3858@ingroup WORK_SHARING
3859@param loc source location information.
3860@param gtid global thread number.
3861@param num_dims number of associated doacross loops.
3862@param dims info on loops bounds.
3863
3864Initialize doacross loop information.
3865Expect compiler send us inclusive bounds,
3866e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3867*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003868void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003869 const struct kmp_dim *dims) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003870 __kmp_assert_valid_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00003871 int j, idx;
3872 kmp_int64 last, trace_count;
3873 kmp_info_t *th = __kmp_threads[gtid];
3874 kmp_team_t *team = th->th.th_team;
3875 kmp_uint32 *flags;
3876 kmp_disp_t *pr_buf = th->th.th_dispatch;
3877 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003878
Jonathan Peyton30419822017-05-12 18:01:32 +00003879 KA_TRACE(
3880 20,
3881 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3882 gtid, num_dims, !team->t.t_serialized));
3883 KMP_DEBUG_ASSERT(dims != NULL);
3884 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003885
Jonathan Peyton30419822017-05-12 18:01:32 +00003886 if (team->t.t_serialized) {
3887 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3888 return; // no dependencies if team is serialized
3889 }
3890 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3891 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3892 // the next loop
3893 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003894
Jonathan Peyton30419822017-05-12 18:01:32 +00003895 // Save bounds info into allocated private buffer
3896 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3897 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3898 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3899 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3900 pr_buf->th_doacross_info[0] =
3901 (kmp_int64)num_dims; // first element is number of dimensions
3902 // Save also address of num_done in order to access it later without knowing
3903 // the buffer index
3904 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3905 pr_buf->th_doacross_info[2] = dims[0].lo;
3906 pr_buf->th_doacross_info[3] = dims[0].up;
3907 pr_buf->th_doacross_info[4] = dims[0].st;
3908 last = 5;
3909 for (j = 1; j < num_dims; ++j) {
3910 kmp_int64
3911 range_length; // To keep ranges of all dimensions but the first dims[0]
3912 if (dims[j].st == 1) { // most common case
3913 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3914 range_length = dims[j].up - dims[j].lo + 1;
3915 } else {
3916 if (dims[j].st > 0) {
3917 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3918 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3919 } else { // negative increment
3920 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3921 range_length =
3922 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3923 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003924 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003925 pr_buf->th_doacross_info[last++] = range_length;
3926 pr_buf->th_doacross_info[last++] = dims[j].lo;
3927 pr_buf->th_doacross_info[last++] = dims[j].up;
3928 pr_buf->th_doacross_info[last++] = dims[j].st;
3929 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003930
Jonathan Peyton30419822017-05-12 18:01:32 +00003931 // Compute total trip count.
3932 // Start with range of dims[0] which we don't need to keep in the buffer.
3933 if (dims[0].st == 1) { // most common case
3934 trace_count = dims[0].up - dims[0].lo + 1;
3935 } else if (dims[0].st > 0) {
3936 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3937 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3938 } else { // negative increment
3939 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3940 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3941 }
3942 for (j = 1; j < num_dims; ++j) {
3943 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3944 }
3945 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003946
Jonathan Peyton30419822017-05-12 18:01:32 +00003947 // Check if shared buffer is not occupied by other loop (idx -
3948 // __kmp_dispatch_num_buffers)
3949 if (idx != sh_buf->doacross_buf_idx) {
3950 // Shared buffer is occupied, wait for it to be free
Jonathan Peytone47d32f2019-02-28 19:11:29 +00003951 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3952 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003953 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003954#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003955 // Check if we are the first thread. After the CAS the first thread gets 0,
3956 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003957 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3958 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3959 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3960#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003961 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003962 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3963#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003964 if (flags == NULL) {
3965 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003966 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003967 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3968 KMP_MB();
3969 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003970 } else if (flags == (kmp_uint32 *)1) {
3971#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003972 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003973 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3974#else
3975 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3976#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003977 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003978 KMP_MB();
3979 } else {
3980 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003981 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003982 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003983 pr_buf->th_doacross_flags =
3984 sh_buf->doacross_flags; // save private copy in order to not
3985 // touch shared buffer on each iteration
3986 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003987}
3988
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003989void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03003990 __kmp_assert_valid_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00003991 kmp_int32 shft, num_dims, i;
3992 kmp_uint32 flag;
3993 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3994 kmp_info_t *th = __kmp_threads[gtid];
3995 kmp_team_t *team = th->th.th_team;
3996 kmp_disp_t *pr_buf;
3997 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003998
Jonathan Peyton30419822017-05-12 18:01:32 +00003999 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4000 if (team->t.t_serialized) {
4001 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4002 return; // no dependencies if team is serialized
4003 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004004
Jonathan Peyton30419822017-05-12 18:01:32 +00004005 // calculate sequential iteration number and check out-of-bounds condition
4006 pr_buf = th->th.th_dispatch;
4007 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4008 num_dims = pr_buf->th_doacross_info[0];
4009 lo = pr_buf->th_doacross_info[2];
4010 up = pr_buf->th_doacross_info[3];
4011 st = pr_buf->th_doacross_info[4];
Joachim Protzecbea3692020-06-15 18:39:47 +02004012#if OMPT_SUPPORT && OMPT_OPTIONAL
4013 ompt_dependence_t deps[num_dims];
4014#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004015 if (st == 1) { // most common case
4016 if (vec[0] < lo || vec[0] > up) {
4017 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4018 "bounds [%lld,%lld]\n",
4019 gtid, vec[0], lo, up));
4020 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004021 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004022 iter_number = vec[0] - lo;
4023 } else if (st > 0) {
4024 if (vec[0] < lo || vec[0] > up) {
4025 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4026 "bounds [%lld,%lld]\n",
4027 gtid, vec[0], lo, up));
4028 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004029 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4031 } else { // negative increment
4032 if (vec[0] > lo || vec[0] < up) {
4033 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4034 "bounds [%lld,%lld]\n",
4035 gtid, vec[0], lo, up));
4036 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004037 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004038 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4039 }
Joachim Protzecbea3692020-06-15 18:39:47 +02004040#if OMPT_SUPPORT && OMPT_OPTIONAL
4041 deps[0].variable.value = iter_number;
4042 deps[0].dependence_type = ompt_dependence_type_sink;
4043#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004044 for (i = 1; i < num_dims; ++i) {
4045 kmp_int64 iter, ln;
4046 kmp_int32 j = i * 4;
4047 ln = pr_buf->th_doacross_info[j + 1];
4048 lo = pr_buf->th_doacross_info[j + 2];
4049 up = pr_buf->th_doacross_info[j + 3];
4050 st = pr_buf->th_doacross_info[j + 4];
4051 if (st == 1) {
4052 if (vec[i] < lo || vec[i] > up) {
4053 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4054 "bounds [%lld,%lld]\n",
4055 gtid, vec[i], lo, up));
4056 return;
4057 }
4058 iter = vec[i] - lo;
4059 } else if (st > 0) {
4060 if (vec[i] < lo || vec[i] > up) {
4061 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4062 "bounds [%lld,%lld]\n",
4063 gtid, vec[i], lo, up));
4064 return;
4065 }
4066 iter = (kmp_uint64)(vec[i] - lo) / st;
4067 } else { // st < 0
4068 if (vec[i] > lo || vec[i] < up) {
4069 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4070 "bounds [%lld,%lld]\n",
4071 gtid, vec[i], lo, up));
4072 return;
4073 }
4074 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4075 }
4076 iter_number = iter + ln * iter_number;
Joachim Protzecbea3692020-06-15 18:39:47 +02004077#if OMPT_SUPPORT && OMPT_OPTIONAL
4078 deps[i].variable.value = iter;
4079 deps[i].dependence_type = ompt_dependence_type_sink;
4080#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004081 }
4082 shft = iter_number % 32; // use 32-bit granularity
4083 iter_number >>= 5; // divided by 32
4084 flag = 1 << shft;
4085 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4086 KMP_YIELD(TRUE);
4087 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004088 KMP_MB();
Joachim Protzecbea3692020-06-15 18:39:47 +02004089#if OMPT_SUPPORT && OMPT_OPTIONAL
4090 if (ompt_enabled.ompt_callback_dependences) {
4091 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4092 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4093 }
4094#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004095 KA_TRACE(20,
4096 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4097 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004098}
4099
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004100void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03004101 __kmp_assert_valid_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00004102 kmp_int32 shft, num_dims, i;
4103 kmp_uint32 flag;
4104 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4105 kmp_info_t *th = __kmp_threads[gtid];
4106 kmp_team_t *team = th->th.th_team;
4107 kmp_disp_t *pr_buf;
4108 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004109
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4111 if (team->t.t_serialized) {
4112 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4113 return; // no dependencies if team is serialized
4114 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004115
Jonathan Peyton30419822017-05-12 18:01:32 +00004116 // calculate sequential iteration number (same as in "wait" but no
4117 // out-of-bounds checks)
4118 pr_buf = th->th.th_dispatch;
4119 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4120 num_dims = pr_buf->th_doacross_info[0];
4121 lo = pr_buf->th_doacross_info[2];
4122 st = pr_buf->th_doacross_info[4];
Joachim Protzecbea3692020-06-15 18:39:47 +02004123#if OMPT_SUPPORT && OMPT_OPTIONAL
4124 ompt_dependence_t deps[num_dims];
4125#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004126 if (st == 1) { // most common case
4127 iter_number = vec[0] - lo;
4128 } else if (st > 0) {
4129 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4130 } else { // negative increment
4131 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4132 }
Joachim Protzecbea3692020-06-15 18:39:47 +02004133#if OMPT_SUPPORT && OMPT_OPTIONAL
4134 deps[0].variable.value = iter_number;
4135 deps[0].dependence_type = ompt_dependence_type_source;
4136#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004137 for (i = 1; i < num_dims; ++i) {
4138 kmp_int64 iter, ln;
4139 kmp_int32 j = i * 4;
4140 ln = pr_buf->th_doacross_info[j + 1];
4141 lo = pr_buf->th_doacross_info[j + 2];
4142 st = pr_buf->th_doacross_info[j + 4];
4143 if (st == 1) {
4144 iter = vec[i] - lo;
4145 } else if (st > 0) {
4146 iter = (kmp_uint64)(vec[i] - lo) / st;
4147 } else { // st < 0
4148 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004149 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004150 iter_number = iter + ln * iter_number;
Joachim Protzecbea3692020-06-15 18:39:47 +02004151#if OMPT_SUPPORT && OMPT_OPTIONAL
4152 deps[i].variable.value = iter;
4153 deps[i].dependence_type = ompt_dependence_type_source;
4154#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004155 }
Joachim Protzecbea3692020-06-15 18:39:47 +02004156#if OMPT_SUPPORT && OMPT_OPTIONAL
4157 if (ompt_enabled.ompt_callback_dependences) {
4158 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4159 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4160 }
4161#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004162 shft = iter_number % 32; // use 32-bit granularity
4163 iter_number >>= 5; // divided by 32
4164 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004165 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004166 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004167 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004168 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4169 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004170}
4171
Jonathan Peyton30419822017-05-12 18:01:32 +00004172void __kmpc_doacross_fini(ident_t *loc, int gtid) {
AndreyChurbanov787eb0c2020-07-20 23:49:58 +03004173 __kmp_assert_valid_gtid(gtid);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004174 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004175 kmp_info_t *th = __kmp_threads[gtid];
4176 kmp_team_t *team = th->th.th_team;
4177 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004178
Jonathan Peyton30419822017-05-12 18:01:32 +00004179 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4180 if (team->t.t_serialized) {
4181 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4182 return; // nothing to do
4183 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004184 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004185 if (num_done == th->th.th_team_nproc) {
4186 // we are the last thread, need to free shared resources
4187 int idx = pr_buf->th_doacross_buf_idx - 1;
4188 dispatch_shared_info_t *sh_buf =
4189 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4190 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4191 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004192 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004193 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004194 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004195 sh_buf->doacross_flags = NULL;
4196 sh_buf->doacross_num_done = 0;
4197 sh_buf->doacross_buf_idx +=
4198 __kmp_dispatch_num_buffers; // free buffer for future re-use
4199 }
4200 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004201 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004202 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4203 pr_buf->th_doacross_info = NULL;
4204 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004205}
Jonathan Peyton71909c52016-03-02 22:42:06 +00004206
Nawrin Sultana938f1b82020-11-06 14:45:25 -06004207/* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
Jonathan Peytonebf18302019-04-08 17:59:28 +00004208void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4209 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4210}
4211
Nawrin Sultana938f1b82020-11-06 14:45:25 -06004212void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4213 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4214}
4215
Jonathan Peytonebf18302019-04-08 17:59:28 +00004216void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4217 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4218}
4219
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004220int __kmpc_get_target_offload(void) {
4221 if (!__kmp_init_serial) {
4222 __kmp_serial_initialize();
4223 }
4224 return __kmp_target_offload;
4225}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004226
4227int __kmpc_pause_resource(kmp_pause_status_t level) {
4228 if (!__kmp_init_serial) {
4229 return 1; // Can't pause if runtime is not initialized
4230 }
4231 return __kmp_pause_resource(level);
4232}