blob: 455cbe28e1bdfa739a35ee4d9e6f99752ef7ccea [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000073#if KMP_OS_WINDOWS && OMPT_SUPPORT
74 // Normal exit process on Windows does not allow worker threads of the final
75 // parallel region to finish reporting their events, so shutting down the
76 // library here fixes the issue at least for the cases where __kmpc_end() is
77 // placed properly.
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
80#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
83/*!
84@ingroup THREAD_STATES
85@param loc Source location information.
86@return The global thread index of the active thread.
87
88This function can be called in any context.
89
90If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000091single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
92that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000093active parallel construct. (Or zero if there is no active parallel
94construct, since the master thread is necessarily thread zero).
95
96If multiple non-OpenMP threads all enter an OpenMP construct then this
97will be a unique thread identifier among all the threads created by
98the OpenMP runtime (but the value cannote be defined in terms of
99OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000101kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
102 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107}
108
109/*!
110@ingroup THREAD_STATES
111@param loc Source location information.
112@return The number of threads under control of the OpenMP<sup>*</sup> runtime
113
114This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000115It returns the total number of threads under the control of the OpenMP runtime.
116That is not a number that can be determined by any OpenMP standard calls, since
117the library may be called from more than one non-OpenMP thread, and this
118reflects the total over all such calls. Similarly the runtime maintains
119underlying threads even when they are not active (since the cost of creating
120and destroying OS threads is high), this call counts all such threads even if
121they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000123kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
124 KC_TRACE(10,
125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128}
129
130/*!
131@ingroup THREAD_STATES
132@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000133@return The thread number of the calling thread in the innermost active parallel
134construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000136kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000146kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148
Jonathan Peyton30419822017-05-12 18:01:32 +0000149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150}
151
152/*!
153 * @ingroup DEPRECATED
154 * @param loc location description
155 *
156 * This function need not be called. It always returns TRUE.
157 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000158kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159#ifndef KMP_DEBUG
160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
163#else
164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 const char *semi2;
166 const char *semi3;
167 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168
Jonathan Peyton30419822017-05-12 18:01:32 +0000169 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000171 }
172 semi2 = loc->psource;
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 semi2 = strchr(semi2, ';');
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2 + 1, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187 name--;
188 }
189 if ((*name == '/') || (*name == ';')) {
190 name++;
191 }
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
194 }
195 }
196 semi3 = strchr(semi2 + 1, ';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
201 }
202 }
203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
206 }
207 return __kmp_par_range < 0;
208 }
209 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210
211#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000217@return 1 if this thread is executing inside an active parallel region, zero if
218not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000220kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222}
223
224/*!
225@ingroup PARALLEL
226@param loc source location information
227@param global_tid global thread number
228@param num_threads number of threads requested for this parallel construct
229
230Set the number of threads to be used by the next fork spawned by this thread.
231This call is only required if the parallel construct has a `num_threads` clause.
232*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234 kmp_int32 num_threads) {
235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239}
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245}
246
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#if OMP_40_ENABLED
248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255}
256
257#endif /* OMP_40_ENABLED */
258
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259/*!
260@ingroup PARALLEL
261@param loc source location information
262@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000263@param microtask pointer to callback routine consisting of outlined parallel
264construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265@param ... pointers to shared variables that aren't global
266
267Do the actual fork and call the microtask in the relevant number of threads.
268*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000269void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000271
Jonathan Peyton61118492016-05-20 19:03:38 +0000272#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000273 // If we were in a serial region, then stop the serial timer, record
274 // the event, and start parallel region timer
275 stats_state_e previous_state = KMP_GET_THREAD_STATE();
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278 } else {
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000281 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 if (inParallel) {
283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284 } else {
285 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000286 }
287#endif
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 // maybe to save thr_state is enough here
290 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 va_list ap;
292 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000295 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000296 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300 if (lwt)
301 ompt_frame = &(lwt->ompt_task_info.frame);
302 else {
303 int tid = __kmp_tid_from_gtid(gtid);
304 ompt_frame = &(
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000308 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000309 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000310#endif
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312#if INCLUDE_SSC_MARKS
313 SSC_MARK_FORKING();
314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000319#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325#if INCLUDE_SSC_MARKS
326 SSC_MARK_JOINING();
327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000329#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000330 ,
331 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000337
338#if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341 } else {
342 KMP_POP_PARTITIONED_TIMER();
343 }
344#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
347#if OMP_40_ENABLED
348/*!
349@ingroup PARALLEL
350@param loc source location information
351@param global_tid global thread number
352@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000353@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354
355Set the number of teams to be used by the teams construct.
356This call is only required if the teams construct has a `num_teams` clause
357or a `thread_limit` clause (or both).
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360 kmp_int32 num_teams, kmp_int32 num_threads) {
361 KA_TRACE(20,
362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
Jonathan Peyton30419822017-05-12 18:01:32 +0000365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366}
367
368/*!
369@ingroup PARALLEL
370@param loc source location information
371@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000372@param microtask pointer to callback routine consisting of outlined teams
373construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374@param ... pointers to shared variables that aren't global
375
376Do the actual fork and call the microtask in the relevant number of threads.
377*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000378void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379 ...) {
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
382 va_list ap;
383 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000386
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 // remember teams entry point and nesting level
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000391
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000392#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000395 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000398 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000399 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400#endif
401
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 // check if __kmpc_push_num_teams called, set default number of teams
403 // otherwise
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
406 }
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 VOLATILE_CAST(microtask_t)
413 __kmp_teams_master, // "wrapped" task
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000415#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000416 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 );
421 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000422#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000423 ,
424 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000425#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000427
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000428 // Pop current CG root off list
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431 this_thr->th.th_cg_roots = tmp->up;
432 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
433 " to node %p. cg_nthreads was %d\n",
434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435 __kmp_free(tmp);
436 // Restore current task's thread_limit from CG root
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 this_thr->th.th_current_task->td_icvs.thread_limit =
439 this_thr->th.th_cg_roots->cg_thread_limit;
440
Jonathan Peyton30419822017-05-12 18:01:32 +0000441 this_thr->th.th_teams_microtask = NULL;
442 this_thr->th.th_teams_level = 0;
443 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
444 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445}
446#endif /* OMP_40_ENABLED */
447
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448// I don't think this function should ever have been exported.
449// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
450// openmp code ever called it, but it's been exported from the RTL for so
451// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000452int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453
454/*!
455@ingroup PARALLEL
456@param loc source location information
457@param global_tid global thread number
458
459Enter a serialized parallel construct. This interface is used to handle a
460conditional parallel region, like this,
461@code
462#pragma omp parallel if (condition)
463@endcode
464when the condition is false.
465*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000466void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000467// The implementation is now in kmp_runtime.cpp so that it can share static
468// functions with kmp_fork_call since the tasks to be done are similar in
469// each case.
470#if OMPT_SUPPORT
471 OMPT_STORE_RETURN_ADDRESS(global_tid);
472#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000473 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474}
475
476/*!
477@ingroup PARALLEL
478@param loc source location information
479@param global_tid global thread number
480
481Leave a serialized parallel construct.
482*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000483void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
484 kmp_internal_control_t *top;
485 kmp_info_t *this_thr;
486 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 KC_TRACE(10,
489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 /* skip all this code for autopar serialized loops since it results in
492 unacceptable overhead */
493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
494 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 // Not autopar code
497 if (!TCR_4(__kmp_init_parallel))
498 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000500#if OMP_50_ENABLED
501 __kmp_resume_if_soft_paused();
502#endif
503
Jonathan Peyton30419822017-05-12 18:01:32 +0000504 this_thr = __kmp_threads[global_tid];
505 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000506
Jonathan Peyton30419822017-05-12 18:01:32 +0000507#if OMP_45_ENABLED
508 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000509
Jonathan Peyton30419822017-05-12 18:01:32 +0000510 // we need to wait for the proxy tasks before finishing the thread
511 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
512 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
513#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000514
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 KMP_MB();
516 KMP_DEBUG_ASSERT(serial_team);
517 KMP_ASSERT(serial_team->t.t_serialized);
518 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
519 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
520 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
521 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522
Joachim Protze82e94a52017-11-01 10:08:30 +0000523#if OMPT_SUPPORT
524 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000525 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
526 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000527 if (ompt_enabled.ompt_callback_implicit_task) {
528 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
529 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000530 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000531 }
532
533 // reset clear the task id only after unlinking the task
534 ompt_data_t *parent_task_data;
535 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
536
537 if (ompt_enabled.ompt_callback_parallel_end) {
538 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
539 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000540 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000541 }
542 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000543 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000544 }
545#endif
546
Jonathan Peyton30419822017-05-12 18:01:32 +0000547 /* If necessary, pop the internal control stack values and replace the team
548 * values */
549 top = serial_team->t.t_control_stack_top;
550 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
551 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
552 serial_team->t.t_control_stack_top = top->next;
553 __kmp_free(top);
554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555
Jonathan Peyton30419822017-05-12 18:01:32 +0000556 // if( serial_team -> t.t_serialized > 1 )
557 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 /* pop dispatch buffers stack */
560 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
561 {
562 dispatch_private_info_t *disp_buffer =
563 serial_team->t.t_dispatch->th_disp_buffer;
564 serial_team->t.t_dispatch->th_disp_buffer =
565 serial_team->t.t_dispatch->th_disp_buffer->next;
566 __kmp_free(disp_buffer);
567 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000568#if OMP_50_ENABLED
569 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
570#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 --serial_team->t.t_serialized;
573 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000576
577#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000578 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
579 __kmp_clear_x87_fpu_status_word();
580 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
581 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
584
Jonathan Peyton30419822017-05-12 18:01:32 +0000585 this_thr->th.th_team = serial_team->t.t_parent;
586 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 /* restore values cached in the thread */
589 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
590 this_thr->th.th_team_master =
591 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
592 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593
Jonathan Peyton30419822017-05-12 18:01:32 +0000594 /* TODO the below shouldn't need to be adjusted for serialized teams */
595 this_thr->th.th_dispatch =
596 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599
Jonathan Peyton30419822017-05-12 18:01:32 +0000600 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
601 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 if (__kmp_tasking_mode != tskm_immediate_exec) {
604 // Copy the task team from the new child / old parent team to the thread.
605 this_thr->th.th_task_team =
606 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
607 KA_TRACE(20,
608 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
609 "team %p\n",
610 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000612 } else {
613 if (__kmp_tasking_mode != tskm_immediate_exec) {
614 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
615 "depth of serial team %p to %d\n",
616 global_tid, serial_team, serial_team->t.t_serialized));
617 }
618 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619
Jonathan Peyton30419822017-05-12 18:01:32 +0000620 if (__kmp_env_consistency_check)
621 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000622#if OMPT_SUPPORT
623 if (ompt_enabled.enabled)
624 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000625 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
626 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000627#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000628}
629
630/*!
631@ingroup SYNCHRONIZATION
632@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000634Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635depending on the memory ordering convention obeyed by the compiler
636even that may not be necessary).
637*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000638void __kmpc_flush(ident_t *loc) {
639 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640
Jonathan Peyton30419822017-05-12 18:01:32 +0000641 /* need explicit __mf() here since use volatile instead in library */
642 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643
Jonathan Peyton30419822017-05-12 18:01:32 +0000644#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
645#if KMP_MIC
646// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
647// We shouldn't need it, though, since the ABI rules require that
648// * If the compiler generates NGO stores it also generates the fence
649// * If users hand-code NGO stores they should insert the fence
650// therefore no incomplete unordered stores should be visible.
651#else
652 // C74404
653 // This is to address non-temporal store instructions (sfence needed).
654 // The clflush instruction is addressed either (mfence needed).
655 // Probably the non-temporal load monvtdqa instruction should also be
656 // addressed.
657 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
658 if (!__kmp_cpuinfo.initialized) {
659 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000660 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000661 if (!__kmp_cpuinfo.sse2) {
662 // CPU cannot execute SSE2 instructions.
663 } else {
664#if KMP_COMPILER_ICC
665 _mm_mfence();
666#elif KMP_COMPILER_MSVC
667 MemoryBarrier();
668#else
669 __sync_synchronize();
670#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000671 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000672#endif // KMP_MIC
673#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
674// Nothing to see here move along
675#elif KMP_ARCH_PPC64
676// Nothing needed here (we have a real MB above).
677#if KMP_OS_CNK
678 // The flushing thread needs to yield here; this prevents a
679 // busy-waiting thread from saturating the pipeline. flush is
680 // often used in loops like this:
681 // while (!flag) {
682 // #pragma omp flush(flag)
683 // }
684 // and adding the yield here is good for at least a 10x speedup
685 // when running >2 threads per core (on the NAS LU benchmark).
686 __kmp_yield(TRUE);
687#endif
688#else
689#error Unknown or unsupported architecture
690#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000691
692#if OMPT_SUPPORT && OMPT_OPTIONAL
693 if (ompt_enabled.ompt_callback_flush) {
694 ompt_callbacks.ompt_callback(ompt_callback_flush)(
695 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
696 }
697#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698}
699
700/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701/*!
702@ingroup SYNCHRONIZATION
703@param loc source location information
704@param global_tid thread id.
705
706Execute a barrier.
707*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000708void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
709 KMP_COUNT_BLOCK(OMP_BARRIER);
710 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 if (!TCR_4(__kmp_init_parallel))
713 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000715#if OMP_50_ENABLED
716 __kmp_resume_if_soft_paused();
717#endif
718
Jonathan Peyton30419822017-05-12 18:01:32 +0000719 if (__kmp_env_consistency_check) {
720 if (loc == 0) {
721 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000722 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 __kmp_check_barrier(global_tid, ct_barrier, loc);
725 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Joachim Protze82e94a52017-11-01 10:08:30 +0000727#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000728 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000729 if (ompt_enabled.enabled) {
730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000731 if (ompt_frame->enter_frame.ptr == NULL)
732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000733 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000735#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 __kmp_threads[global_tid]->th.th_ident = loc;
737 // TODO: explicit barrier_wait_id:
738 // this function is called when 'barrier' directive is present or
739 // implicit barrier at the end of a worksharing construct.
740 // 1) better to add a per-thread barrier counter to a thread data structure
741 // 2) set to 0 when a new team is created
742 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000745#if OMPT_SUPPORT && OMPT_OPTIONAL
746 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000747 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000749#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750}
751
752/* The BARRIER for a MASTER section is always explicit */
753/*!
754@ingroup WORK_SHARING
755@param loc source location information.
756@param global_tid global thread number .
757@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
758*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000759kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
760 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 if (!TCR_4(__kmp_init_parallel))
765 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000767#if OMP_50_ENABLED
768 __kmp_resume_if_soft_paused();
769#endif
770
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 if (KMP_MASTER_GTID(global_tid)) {
772 KMP_COUNT_BLOCK(OMP_MASTER);
773 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
774 status = 1;
775 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776
Joachim Protze82e94a52017-11-01 10:08:30 +0000777#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000779 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 kmp_info_t *this_thr = __kmp_threads[global_tid];
781 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000782
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000784 ompt_callbacks.ompt_callback(ompt_callback_master)(
785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000788 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000790#endif
791
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000793#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 if (status)
795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
796 else
797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000798#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 if (status)
800 __kmp_push_sync(global_tid, ct_master, loc, NULL);
801 else
802 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000803#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000805
Jonathan Peyton30419822017-05-12 18:01:32 +0000806 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807}
808
809/*!
810@ingroup WORK_SHARING
811@param loc source location information.
812@param global_tid global thread number .
813
Jonathan Peyton30419822017-05-12 18:01:32 +0000814Mark the end of a <tt>master</tt> region. This should only be called by the
815thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000817void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
818 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822
Joachim Protze82e94a52017-11-01 10:08:30 +0000823#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000824 kmp_info_t *this_thr = __kmp_threads[global_tid];
825 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000826 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000828 ompt_callbacks.ompt_callback(ompt_callback_master)(
829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000832 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000833#endif
834
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 if (__kmp_env_consistency_check) {
836 if (global_tid < 0)
837 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 if (KMP_MASTER_GTID(global_tid))
840 __kmp_pop_sync(global_tid, ct_master, loc);
841 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842}
843
844/*!
845@ingroup WORK_SHARING
846@param loc source location information.
847@param gtid global thread number.
848
849Start execution of an <tt>ordered</tt> construct.
850*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000851void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
852 int cid = 0;
853 kmp_info_t *th;
854 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000855
Jonathan Peyton30419822017-05-12 18:01:32 +0000856 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 if (!TCR_4(__kmp_init_parallel))
859 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000861#if OMP_50_ENABLED
862 __kmp_resume_if_soft_paused();
863#endif
864
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000866 __kmp_itt_ordered_prep(gtid);
867// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000868#endif /* USE_ITT_BUILD */
869
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000871
Joachim Protze82e94a52017-11-01 10:08:30 +0000872#if OMPT_SUPPORT && OMPT_OPTIONAL
873 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000874 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000875 void *codeptr_ra;
876 if (ompt_enabled.enabled) {
877 OMPT_STORE_RETURN_ADDRESS(gtid);
878 team = __kmp_team_from_gtid(gtid);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000879 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000880 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000881 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000882 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000883
Jonathan Peyton30419822017-05-12 18:01:32 +0000884 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
886 if (ompt_enabled.ompt_callback_mutex_acquire) {
887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000889 (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000890 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000891 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000892#endif
893
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 if (th->th.th_dispatch->th_deo_fcn != 0)
895 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
896 else
897 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000898
Joachim Protze82e94a52017-11-01 10:08:30 +0000899#if OMPT_SUPPORT && OMPT_OPTIONAL
900 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000901 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000902 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000903 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000904
Jonathan Peyton30419822017-05-12 18:01:32 +0000905 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000906 if (ompt_enabled.ompt_callback_mutex_acquired) {
907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000908 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000909 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000911#endif
912
Jim Cownie5e8470a2013-09-27 10:38:44 +0000913#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000914 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915#endif /* USE_ITT_BUILD */
916}
917
918/*!
919@ingroup WORK_SHARING
920@param loc source location information.
921@param gtid global thread number.
922
923End execution of an <tt>ordered</tt> construct.
924*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000925void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
926 int cid = 0;
927 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000928
Jonathan Peyton30419822017-05-12 18:01:32 +0000929 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930
931#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000932 __kmp_itt_ordered_end(gtid);
933// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934#endif /* USE_ITT_BUILD */
935
Jonathan Peyton30419822017-05-12 18:01:32 +0000936 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000937
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 if (th->th.th_dispatch->th_dxo_fcn != 0)
939 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
940 else
941 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000942
Joachim Protze82e94a52017-11-01 10:08:30 +0000943#if OMPT_SUPPORT && OMPT_OPTIONAL
944 OMPT_STORE_RETURN_ADDRESS(gtid);
945 if (ompt_enabled.ompt_callback_mutex_released) {
946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
947 ompt_mutex_ordered,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000948 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000949 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000950 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000951#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952}
953
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000954#if KMP_USE_DYNAMIC_LOCK
955
Jonathan Peytondae13d82015-12-11 21:57:06 +0000956static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000957__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
958 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
959 // Pointer to the allocated indirect lock is written to crit, while indexing
960 // is ignored.
961 void *idx;
962 kmp_indirect_lock_t **lck;
963 lck = (kmp_indirect_lock_t **)crit;
964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966 KMP_SET_I_LOCK_LOCATION(ilk, loc);
967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
968 KA_TRACE(20,
969 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000970#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000971 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000972#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000973 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000974 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000975#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000976 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000977#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000978 // We don't really need to destroy the unclaimed lock here since it will be
979 // cleaned up at program exit.
980 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
981 }
982 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000983}
984
985// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000986#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
987 { \
988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000993 kmp_uint32 spins; \
994 KMP_FSYNC_PREPARE(l); \
995 KMP_INIT_YIELD(spins); \
996 if (TCR_4(__kmp_nth) > \
997 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
998 KMP_YIELD(TRUE); \
999 } else { \
1000 KMP_YIELD_SPIN(spins); \
1001 } \
1002 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001003 while ( \
1004 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1005 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +00001006 __kmp_spin_backoff(&backoff); \
1007 if (TCR_4(__kmp_nth) > \
1008 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1009 KMP_YIELD(TRUE); \
1010 } else { \
1011 KMP_YIELD_SPIN(spins); \
1012 } \
1013 } \
1014 } \
1015 KMP_FSYNC_ACQUIRED(l); \
1016 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001017
1018// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001019#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1020 { \
1021 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001022 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1023 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1024 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1025 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001026 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001027
1028// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001029#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001030 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001031
Jonathan Peytondae13d82015-12-11 21:57:06 +00001032#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001033
Jonathan Peyton30419822017-05-12 18:01:32 +00001034#include <sys/syscall.h>
1035#include <unistd.h>
1036#ifndef FUTEX_WAIT
1037#define FUTEX_WAIT 0
1038#endif
1039#ifndef FUTEX_WAKE
1040#define FUTEX_WAKE 1
1041#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001042
1043// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001044#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1045 { \
1046 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1047 kmp_int32 gtid_code = (gtid + 1) << 1; \
1048 KMP_MB(); \
1049 KMP_FSYNC_PREPARE(ftx); \
1050 kmp_int32 poll_val; \
1051 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1052 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1053 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1054 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1055 if (!cond) { \
1056 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1057 poll_val | \
1058 KMP_LOCK_BUSY(1, futex))) { \
1059 continue; \
1060 } \
1061 poll_val |= KMP_LOCK_BUSY(1, futex); \
1062 } \
1063 kmp_int32 rc; \
1064 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1065 NULL, NULL, 0)) != 0) { \
1066 continue; \
1067 } \
1068 gtid_code |= 1; \
1069 } \
1070 KMP_FSYNC_ACQUIRED(ftx); \
1071 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001072
1073// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001074#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1075 { \
1076 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1077 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1078 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1079 KMP_FSYNC_ACQUIRED(ftx); \
1080 rc = TRUE; \
1081 } else { \
1082 rc = FALSE; \
1083 } \
1084 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001085
1086// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001087#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1088 { \
1089 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1090 KMP_MB(); \
1091 KMP_FSYNC_RELEASING(ftx); \
1092 kmp_int32 poll_val = \
1093 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1094 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1095 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1096 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1097 } \
1098 KMP_MB(); \
1099 KMP_YIELD(TCR_4(__kmp_nth) > \
1100 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1101 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001102
Jonathan Peytondae13d82015-12-11 21:57:06 +00001103#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001104
1105#else // KMP_USE_DYNAMIC_LOCK
1106
Jonathan Peyton30419822017-05-12 18:01:32 +00001107static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1108 ident_t const *loc,
1109 kmp_int32 gtid) {
1110 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111
Jonathan Peyton30419822017-05-12 18:01:32 +00001112 // Because of the double-check, the following load doesn't need to be volatile
1113 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001114
Jonathan Peyton30419822017-05-12 18:01:32 +00001115 if (lck == NULL) {
1116 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117
Jonathan Peyton30419822017-05-12 18:01:32 +00001118 // Allocate & initialize the lock.
1119 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1120 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1121 __kmp_init_user_lock_with_checks(lck);
1122 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001124 __kmp_itt_critical_creating(lck);
1125// __kmp_itt_critical_creating() should be called *before* the first usage
1126// of underlying lock. It is the only place where we can guarantee it. There
1127// are chances the lock will destroyed with no usage, but it is not a
1128// problem, because this is not real event seen by user but rather setting
1129// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130#endif /* USE_ITT_BUILD */
1131
Jonathan Peyton30419822017-05-12 18:01:32 +00001132 // Use a cmpxchg instruction to slam the start of the critical section with
1133 // the lock pointer. If another thread beat us to it, deallocate the lock,
1134 // and use the lock that the other thread allocated.
1135 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 if (status == 0) {
1138// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 __kmp_itt_critical_destroyed(lck);
1141// Let ITT know the lock is destroyed and the same memory location may be reused
1142// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001144 __kmp_destroy_user_lock_with_checks(lck);
1145 __kmp_user_lock_free(&idx, gtid, lck);
1146 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1147 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001149 }
1150 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001151}
1152
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001153#endif // KMP_USE_DYNAMIC_LOCK
1154
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155/*!
1156@ingroup WORK_SHARING
1157@param loc source location information.
1158@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001159@param crit identity of the critical section. This could be a pointer to a lock
1160associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001161
1162Enter code protected by a `critical` construct.
1163This function blocks until the executing thread can enter the critical section.
1164*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001165void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1166 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001167#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001168#if OMPT_SUPPORT && OMPT_OPTIONAL
1169 OMPT_STORE_RETURN_ADDRESS(global_tid);
1170#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001171 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001172#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001173 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001174#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001175 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001176 ompt_thread_info_t ti;
1177#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001178 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001179
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001181
Jonathan Peyton30419822017-05-12 18:01:32 +00001182 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001184 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001186
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 if ((__kmp_user_lock_kind == lk_tas) &&
1188 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1189 lck = (kmp_user_lock_p)crit;
1190 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001191#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001192 else if ((__kmp_user_lock_kind == lk_futex) &&
1193 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1194 lck = (kmp_user_lock_p)crit;
1195 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001196#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001197 else { // ticket, queuing or drdpa
1198 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1199 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001200
Jonathan Peyton30419822017-05-12 18:01:32 +00001201 if (__kmp_env_consistency_check)
1202 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001203
Jonathan Peyton30419822017-05-12 18:01:32 +00001204// since the critical directive binds to all threads, not just the current
1205// team we have to check this even if we are in a serialized team.
1206// also, even if we are the uber thread, we still have to conduct the lock,
1207// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001208
1209#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001210 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001211#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001212#if OMPT_SUPPORT && OMPT_OPTIONAL
1213 OMPT_STORE_RETURN_ADDRESS(gtid);
1214 void *codeptr_ra = NULL;
1215 if (ompt_enabled.enabled) {
1216 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1217 /* OMPT state update */
1218 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001219 ti.wait_id = (ompt_wait_id_t)lck;
1220 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001221
1222 /* OMPT event callback */
1223 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1224 if (ompt_enabled.ompt_callback_mutex_acquire) {
1225 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1226 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001227 (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001228 }
1229 }
1230#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001231 // Value of 'crit' should be good for using as a critical_id of the critical
1232 // section directive.
1233 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001234
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001235#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001236 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001237#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001238#if OMPT_SUPPORT && OMPT_OPTIONAL
1239 if (ompt_enabled.enabled) {
1240 /* OMPT state update */
1241 ti.state = prev_state;
1242 ti.wait_id = 0;
1243
1244 /* OMPT event callback */
1245 if (ompt_enabled.ompt_callback_mutex_acquired) {
1246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001247 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001248 }
1249 }
1250#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001251 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001252
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001253 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001254 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001255#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001256}
1257
1258#if KMP_USE_DYNAMIC_LOCK
1259
1260// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001261static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001262#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001263#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001264#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001265#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001266#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001267
1268#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001269#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001270#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001271#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001272#endif
1273
Jonathan Peyton30419822017-05-12 18:01:32 +00001274 // Hints that do not require further logic
1275 if (hint & kmp_lock_hint_hle)
1276 return KMP_TSX_LOCK(hle);
1277 if (hint & kmp_lock_hint_rtm)
1278 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1279 if (hint & kmp_lock_hint_adaptive)
1280 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 // Rule out conflicting hints first by returning the default lock
1283 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001284 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001285 if ((hint & omp_lock_hint_speculative) &&
1286 (hint & omp_lock_hint_nonspeculative))
1287 return __kmp_user_lock_seq;
1288
1289 // Do not even consider speculation when it appears to be contended
1290 if (hint & omp_lock_hint_contended)
1291 return lockseq_queuing;
1292
1293 // Uncontended lock without speculation
1294 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1295 return lockseq_tas;
1296
1297 // HLE lock for speculation
1298 if (hint & omp_lock_hint_speculative)
1299 return KMP_TSX_LOCK(hle);
1300
1301 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001302}
1303
Joachim Protze82e94a52017-11-01 10:08:30 +00001304#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001305#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001306static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001307__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1308 if (user_lock) {
1309 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1310 case 0:
1311 break;
1312#if KMP_USE_FUTEX
1313 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001314 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001315#endif
1316 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001317 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001318#if KMP_USE_TSX
1319 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001320 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001321#endif
1322 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001323 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001324 }
1325 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1326 }
1327 KMP_ASSERT(ilock);
1328 switch (ilock->type) {
1329#if KMP_USE_TSX
1330 case locktag_adaptive:
1331 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001332 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001333#endif
1334 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001335 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001336#if KMP_USE_FUTEX
1337 case locktag_nested_futex:
1338#endif
1339 case locktag_ticket:
1340 case locktag_queuing:
1341 case locktag_drdpa:
1342 case locktag_nested_ticket:
1343 case locktag_nested_queuing:
1344 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001345 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001346 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001347 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001348 }
1349}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001350#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001351// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001352static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001353 switch (__kmp_user_lock_kind) {
1354 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001355 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001356#if KMP_USE_FUTEX
1357 case lk_futex:
1358#endif
1359 case lk_ticket:
1360 case lk_queuing:
1361 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001362 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001363#if KMP_USE_TSX
1364 case lk_hle:
1365 case lk_rtm:
1366 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001367 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001368#endif
1369 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001370 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001371 }
1372}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001373#endif // KMP_USE_DYNAMIC_LOCK
1374#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001375
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001376/*!
1377@ingroup WORK_SHARING
1378@param loc source location information.
1379@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001380@param crit identity of the critical section. This could be a pointer to a lock
1381associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001382@param hint the lock hint.
1383
Jonathan Peyton30419822017-05-12 18:01:32 +00001384Enter code protected by a `critical` construct with a hint. The hint value is
1385used to suggest a lock implementation. This function blocks until the executing
1386thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001387speculative execution and the hardware supports it.
1388*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001389void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001390 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001391 KMP_COUNT_BLOCK(OMP_CRITICAL);
1392 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001393#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001394 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001395 ompt_thread_info_t ti;
1396 // This is the case, if called from __kmpc_critical:
1397 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1398 if (!codeptr)
1399 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1400#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001401
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001403
Jonathan Peyton30419822017-05-12 18:01:32 +00001404 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1405 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001406 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001407 if (*lk == 0) {
1408 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1409 if (KMP_IS_D_LOCK(lckseq)) {
1410 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1411 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001412 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001414 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 }
1416 // Branch for accessing the actual lock object and set operation. This
1417 // branching is inevitable since this lock initialization does not follow the
1418 // normal dispatch path (lock table is not used).
1419 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1420 lck = (kmp_user_lock_p)lk;
1421 if (__kmp_env_consistency_check) {
1422 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1423 __kmp_map_hint_to_lock(hint));
1424 }
1425#if USE_ITT_BUILD
1426 __kmp_itt_critical_acquiring(lck);
1427#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001428#if OMPT_SUPPORT && OMPT_OPTIONAL
1429 if (ompt_enabled.enabled) {
1430 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1431 /* OMPT state update */
1432 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001433 ti.wait_id = (ompt_wait_id_t)lck;
1434 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001435
1436 /* OMPT event callback */
1437 if (ompt_enabled.ompt_callback_mutex_acquire) {
1438 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1439 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001440 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001441 }
1442 }
1443#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001444#if KMP_USE_INLINED_TAS
1445 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1446 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1447 } else
1448#elif KMP_USE_INLINED_FUTEX
1449 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1450 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1451 } else
1452#endif
1453 {
1454 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1455 }
1456 } else {
1457 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1458 lck = ilk->lock;
1459 if (__kmp_env_consistency_check) {
1460 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1461 __kmp_map_hint_to_lock(hint));
1462 }
1463#if USE_ITT_BUILD
1464 __kmp_itt_critical_acquiring(lck);
1465#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001466#if OMPT_SUPPORT && OMPT_OPTIONAL
1467 if (ompt_enabled.enabled) {
1468 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1469 /* OMPT state update */
1470 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001471 ti.wait_id = (ompt_wait_id_t)lck;
1472 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001473
1474 /* OMPT event callback */
1475 if (ompt_enabled.ompt_callback_mutex_acquire) {
1476 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1477 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001478 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001479 }
1480 }
1481#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1483 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001484 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001485
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001487 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001489#if OMPT_SUPPORT && OMPT_OPTIONAL
1490 if (ompt_enabled.enabled) {
1491 /* OMPT state update */
1492 ti.state = prev_state;
1493 ti.wait_id = 0;
1494
1495 /* OMPT event callback */
1496 if (ompt_enabled.ompt_callback_mutex_acquired) {
1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001498 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001499 }
1500 }
1501#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001502
Jonathan Peyton30419822017-05-12 18:01:32 +00001503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1504 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001505} // __kmpc_critical_with_hint
1506
1507#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508
1509/*!
1510@ingroup WORK_SHARING
1511@param loc source location information.
1512@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001513@param crit identity of the critical section. This could be a pointer to a lock
1514associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001515
1516Leave a critical section, releasing any lock that was held during its execution.
1517*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001518void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1519 kmp_critical_name *crit) {
1520 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001521
Jonathan Peyton30419822017-05-12 18:01:32 +00001522 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001523
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001524#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001525 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1526 lck = (kmp_user_lock_p)crit;
1527 KMP_ASSERT(lck != NULL);
1528 if (__kmp_env_consistency_check) {
1529 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001530 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001531#if USE_ITT_BUILD
1532 __kmp_itt_critical_releasing(lck);
1533#endif
1534#if KMP_USE_INLINED_TAS
1535 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1536 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1537 } else
1538#elif KMP_USE_INLINED_FUTEX
1539 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1540 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1541 } else
1542#endif
1543 {
1544 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1545 }
1546 } else {
1547 kmp_indirect_lock_t *ilk =
1548 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1549 KMP_ASSERT(ilk != NULL);
1550 lck = ilk->lock;
1551 if (__kmp_env_consistency_check) {
1552 __kmp_pop_sync(global_tid, ct_critical, loc);
1553 }
1554#if USE_ITT_BUILD
1555 __kmp_itt_critical_releasing(lck);
1556#endif
1557 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1558 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001559
1560#else // KMP_USE_DYNAMIC_LOCK
1561
Jonathan Peyton30419822017-05-12 18:01:32 +00001562 if ((__kmp_user_lock_kind == lk_tas) &&
1563 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1564 lck = (kmp_user_lock_p)crit;
1565 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001566#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 else if ((__kmp_user_lock_kind == lk_futex) &&
1568 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1569 lck = (kmp_user_lock_p)crit;
1570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001572 else { // ticket, queuing or drdpa
1573 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1574 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001577
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 if (__kmp_env_consistency_check)
1579 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580
1581#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001582 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001584 // Value of 'crit' should be good for using as a critical_id of the critical
1585 // section directive.
1586 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587
Joachim Protze82e94a52017-11-01 10:08:30 +00001588#endif // KMP_USE_DYNAMIC_LOCK
1589
1590#if OMPT_SUPPORT && OMPT_OPTIONAL
1591 /* OMPT release event triggers after lock is released; place here to trigger
1592 * for all #if branches */
1593 OMPT_STORE_RETURN_ADDRESS(global_tid);
1594 if (ompt_enabled.ompt_callback_mutex_released) {
1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001596 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001598#endif
1599
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 KMP_POP_PARTITIONED_TIMER();
1601 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602}
1603
1604/*!
1605@ingroup SYNCHRONIZATION
1606@param loc source location information
1607@param global_tid thread id.
1608@return one if the thread should execute the master block, zero otherwise
1609
Jonathan Peyton30419822017-05-12 18:01:32 +00001610Start execution of a combined barrier and master. The barrier is executed inside
1611this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001613kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1614 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615
Jonathan Peyton30419822017-05-12 18:01:32 +00001616 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001617
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 if (!TCR_4(__kmp_init_parallel))
1619 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001620
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001621#if OMP_50_ENABLED
1622 __kmp_resume_if_soft_paused();
1623#endif
1624
Jonathan Peyton30419822017-05-12 18:01:32 +00001625 if (__kmp_env_consistency_check)
1626 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627
Joachim Protze82e94a52017-11-01 10:08:30 +00001628#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001629 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001630 if (ompt_enabled.enabled) {
1631 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001632 if (ompt_frame->enter_frame.ptr == NULL)
1633 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001634 OMPT_STORE_RETURN_ADDRESS(global_tid);
1635 }
1636#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001637#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001639#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001641#if OMPT_SUPPORT && OMPT_OPTIONAL
1642 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001643 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001644 }
1645#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646
Jonathan Peyton30419822017-05-12 18:01:32 +00001647 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648}
1649
1650/*!
1651@ingroup SYNCHRONIZATION
1652@param loc source location information
1653@param global_tid thread id.
1654
1655Complete the execution of a combined barrier and master. This function should
1656only be called at the completion of the <tt>master</tt> code. Other threads will
1657still be waiting at the barrier and this call releases them.
1658*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001659void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1660 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001661
Jonathan Peyton30419822017-05-12 18:01:32 +00001662 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001663}
1664
1665/*!
1666@ingroup SYNCHRONIZATION
1667@param loc source location information
1668@param global_tid thread id.
1669@return one if the thread should execute the master block, zero otherwise
1670
1671Start execution of a combined barrier and master(nowait) construct.
1672The barrier is executed inside this function.
1673There is no equivalent "end" function, since the
1674*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001675kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1676 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679
Jonathan Peyton30419822017-05-12 18:01:32 +00001680 if (!TCR_4(__kmp_init_parallel))
1681 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001683#if OMP_50_ENABLED
1684 __kmp_resume_if_soft_paused();
1685#endif
1686
Jonathan Peyton30419822017-05-12 18:01:32 +00001687 if (__kmp_env_consistency_check) {
1688 if (loc == 0) {
1689 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001691 __kmp_check_barrier(global_tid, ct_barrier, loc);
1692 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693
Joachim Protze82e94a52017-11-01 10:08:30 +00001694#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001695 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001696 if (ompt_enabled.enabled) {
1697 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001698 if (ompt_frame->enter_frame.ptr == NULL)
1699 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001700 OMPT_STORE_RETURN_ADDRESS(global_tid);
1701 }
1702#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001703#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001704 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001705#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001706 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001707#if OMPT_SUPPORT && OMPT_OPTIONAL
1708 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001709 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001710 }
1711#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712
Jonathan Peyton30419822017-05-12 18:01:32 +00001713 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001714
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 if (__kmp_env_consistency_check) {
1716 /* there's no __kmpc_end_master called; so the (stats) */
1717 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 if (global_tid < 0) {
1720 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001721 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001722 if (ret) {
1723 /* only one thread should do the pop since only */
1724 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001725
Jonathan Peyton30419822017-05-12 18:01:32 +00001726 __kmp_pop_sync(global_tid, ct_master, loc);
1727 }
1728 }
1729
1730 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001731}
1732
1733/* The BARRIER for a SINGLE process section is always explicit */
1734/*!
1735@ingroup WORK_SHARING
1736@param loc source location information
1737@param global_tid global thread number
1738@return One if this thread should execute the single construct, zero otherwise.
1739
1740Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001741There are no implicit barriers in the two "single" calls, rather the compiler
1742should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743*/
1744
Jonathan Peyton30419822017-05-12 18:01:32 +00001745kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1746 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001747
Jonathan Peyton30419822017-05-12 18:01:32 +00001748 if (rc) {
1749 // We are going to execute the single statement, so we should count it.
1750 KMP_COUNT_BLOCK(OMP_SINGLE);
1751 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1752 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001753
Joachim Protze82e94a52017-11-01 10:08:30 +00001754#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001755 kmp_info_t *this_thr = __kmp_threads[global_tid];
1756 kmp_team_t *team = this_thr->th.th_team;
1757 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001758
Joachim Protze82e94a52017-11-01 10:08:30 +00001759 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001761 if (ompt_enabled.ompt_callback_work) {
1762 ompt_callbacks.ompt_callback(ompt_callback_work)(
1763 ompt_work_single_executor, ompt_scope_begin,
1764 &(team->t.ompt_team_info.parallel_data),
1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001767 }
1768 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001769 if (ompt_enabled.ompt_callback_work) {
1770 ompt_callbacks.ompt_callback(ompt_callback_work)(
1771 ompt_work_single_other, ompt_scope_begin,
1772 &(team->t.ompt_team_info.parallel_data),
1773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1774 1, OMPT_GET_RETURN_ADDRESS(0));
1775 ompt_callbacks.ompt_callback(ompt_callback_work)(
1776 ompt_work_single_other, ompt_scope_end,
1777 &(team->t.ompt_team_info.parallel_data),
1778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1779 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001780 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001781 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001783#endif
1784
Jonathan Peyton30419822017-05-12 18:01:32 +00001785 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786}
1787
1788/*!
1789@ingroup WORK_SHARING
1790@param loc source location information
1791@param global_tid global thread number
1792
1793Mark the end of a <tt>single</tt> construct. This function should
1794only be called by the thread that executed the block of code protected
1795by the `single` construct.
1796*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001797void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1798 __kmp_exit_single(global_tid);
1799 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001800
Joachim Protze82e94a52017-11-01 10:08:30 +00001801#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001802 kmp_info_t *this_thr = __kmp_threads[global_tid];
1803 kmp_team_t *team = this_thr->th.th_team;
1804 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001805
Joachim Protze82e94a52017-11-01 10:08:30 +00001806 if (ompt_enabled.ompt_callback_work) {
1807 ompt_callbacks.ompt_callback(ompt_callback_work)(
1808 ompt_work_single_executor, ompt_scope_end,
1809 &(team->t.ompt_team_info.parallel_data),
1810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1811 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001813#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814}
1815
1816/*!
1817@ingroup WORK_SHARING
1818@param loc Source location
1819@param global_tid Global thread id
1820
1821Mark the end of a statically scheduled loop.
1822*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001823void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001824 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001825 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001826
Joachim Protze82e94a52017-11-01 10:08:30 +00001827#if OMPT_SUPPORT && OMPT_OPTIONAL
1828 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001829 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001830 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001831 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1832 // Determine workshare type
1833 if (loc != NULL) {
1834 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1835 ompt_work_type = ompt_work_loop;
1836 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1837 ompt_work_type = ompt_work_sections;
1838 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1839 ompt_work_type = ompt_work_distribute;
1840 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001841 // use default set above.
1842 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001843 }
1844 KMP_DEBUG_ASSERT(ompt_work_type);
1845 }
1846 ompt_callbacks.ompt_callback(ompt_callback_work)(
1847 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1848 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001849 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001850#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 if (__kmp_env_consistency_check)
1852 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853}
1854
Jonathan Peyton30419822017-05-12 18:01:32 +00001855// User routines which take C-style arguments (call by value)
1856// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001857
Jonathan Peyton30419822017-05-12 18:01:32 +00001858void ompc_set_num_threads(int arg) {
1859 // !!!!! TODO: check the per-task binding
1860 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001861}
1862
Jonathan Peyton30419822017-05-12 18:01:32 +00001863void ompc_set_dynamic(int flag) {
1864 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 /* For the thread-private implementation of the internal controls */
1867 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868
Jonathan Peyton30419822017-05-12 18:01:32 +00001869 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001870
Jonathan Peyton30419822017-05-12 18:01:32 +00001871 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872}
1873
Jonathan Peyton30419822017-05-12 18:01:32 +00001874void ompc_set_nested(int flag) {
1875 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876
Jonathan Peyton30419822017-05-12 18:01:32 +00001877 /* For the thread-private internal controls implementation */
1878 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883}
1884
Jonathan Peyton30419822017-05-12 18:01:32 +00001885void ompc_set_max_active_levels(int max_active_levels) {
1886 /* TO DO */
1887 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889 /* For the per-thread internal controls implementation */
1890 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891}
1892
Jonathan Peyton30419822017-05-12 18:01:32 +00001893void ompc_set_schedule(omp_sched_t kind, int modifier) {
1894 // !!!!! TODO: check the per-task binding
1895 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896}
1897
Jonathan Peyton30419822017-05-12 18:01:32 +00001898int ompc_get_ancestor_thread_num(int level) {
1899 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900}
1901
Jonathan Peyton30419822017-05-12 18:01:32 +00001902int ompc_get_team_size(int level) {
1903 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904}
1905
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001906#if OMP_50_ENABLED
1907/* OpenMP 5.0 Affinity Format API */
1908
1909void ompc_set_affinity_format(char const *format) {
1910 if (!__kmp_init_serial) {
1911 __kmp_serial_initialize();
1912 }
1913 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1914 format, KMP_STRLEN(format) + 1);
1915}
1916
1917size_t ompc_get_affinity_format(char *buffer, size_t size) {
1918 size_t format_size;
1919 if (!__kmp_init_serial) {
1920 __kmp_serial_initialize();
1921 }
1922 format_size = KMP_STRLEN(__kmp_affinity_format);
1923 if (buffer && size) {
1924 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1925 format_size + 1);
1926 }
1927 return format_size;
1928}
1929
1930void ompc_display_affinity(char const *format) {
1931 int gtid;
1932 if (!TCR_4(__kmp_init_middle)) {
1933 __kmp_middle_initialize();
1934 }
1935 gtid = __kmp_get_gtid();
1936 __kmp_aux_display_affinity(gtid, format);
1937}
1938
1939size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1940 char const *format) {
1941 int gtid;
1942 size_t num_required;
1943 kmp_str_buf_t capture_buf;
1944 if (!TCR_4(__kmp_init_middle)) {
1945 __kmp_middle_initialize();
1946 }
1947 gtid = __kmp_get_gtid();
1948 __kmp_str_buf_init(&capture_buf);
1949 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1950 if (buffer && buf_size) {
1951 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1952 capture_buf.used + 1);
1953 }
1954 __kmp_str_buf_free(&capture_buf);
1955 return num_required;
1956}
1957#endif /* OMP_50_ENABLED */
1958
Jonathan Peyton30419822017-05-12 18:01:32 +00001959void kmpc_set_stacksize(int arg) {
1960 // __kmp_aux_set_stacksize initializes the library if needed
1961 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962}
1963
Jonathan Peyton30419822017-05-12 18:01:32 +00001964void kmpc_set_stacksize_s(size_t arg) {
1965 // __kmp_aux_set_stacksize initializes the library if needed
1966 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967}
1968
Jonathan Peyton30419822017-05-12 18:01:32 +00001969void kmpc_set_blocktime(int arg) {
1970 int gtid, tid;
1971 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001972
Jonathan Peyton30419822017-05-12 18:01:32 +00001973 gtid = __kmp_entry_gtid();
1974 tid = __kmp_tid_from_gtid(gtid);
1975 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976
Jonathan Peyton30419822017-05-12 18:01:32 +00001977 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978}
1979
Jonathan Peyton30419822017-05-12 18:01:32 +00001980void kmpc_set_library(int arg) {
1981 // __kmp_user_set_library initializes the library if needed
1982 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983}
1984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985void kmpc_set_defaults(char const *str) {
1986 // __kmp_aux_set_defaults initializes the library if needed
1987 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988}
1989
Jonathan Peyton30419822017-05-12 18:01:32 +00001990void kmpc_set_disp_num_buffers(int arg) {
1991 // ignore after initialization because some teams have already
1992 // allocated dispatch buffers
1993 if (__kmp_init_serial == 0 && arg > 0)
1994 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001995}
1996
Jonathan Peyton30419822017-05-12 18:01:32 +00001997int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001998#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001999 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002001 if (!TCR_4(__kmp_init_middle)) {
2002 __kmp_middle_initialize();
2003 }
2004 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005#endif
2006}
2007
Jonathan Peyton30419822017-05-12 18:01:32 +00002008int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002009#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002010 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 if (!TCR_4(__kmp_init_middle)) {
2013 __kmp_middle_initialize();
2014 }
2015 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016#endif
2017}
2018
Jonathan Peyton30419822017-05-12 18:01:32 +00002019int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002020#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002021 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 if (!TCR_4(__kmp_init_middle)) {
2024 __kmp_middle_initialize();
2025 }
2026 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027#endif
2028}
2029
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030/* -------------------------------------------------------------------------- */
2031/*!
2032@ingroup THREADPRIVATE
2033@param loc source location information
2034@param gtid global thread number
2035@param cpy_size size of the cpy_data buffer
2036@param cpy_data pointer to data to be copied
2037@param cpy_func helper function to call for copying data
2038@param didit flag variable: 1=single thread; 0=not single thread
2039
Jonathan Peyton30419822017-05-12 18:01:32 +00002040__kmpc_copyprivate implements the interface for the private data broadcast
2041needed for the copyprivate clause associated with a single region in an
2042OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002044One of the threads (called the single thread) should have the <tt>didit</tt>
2045variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046All threads pass a pointer to a data buffer (cpy_data) that they have built.
2047
Jonathan Peyton30419822017-05-12 18:01:32 +00002048The OpenMP specification forbids the use of nowait on the single region when a
2049copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2050barrier internally to avoid race conditions, so the code generation for the
2051single region should avoid generating a barrier after the call to @ref
2052__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053
2054The <tt>gtid</tt> parameter is the global thread id for the current thread.
2055The <tt>loc</tt> parameter is a pointer to source location information.
2056
Jonathan Peyton30419822017-05-12 18:01:32 +00002057Internal implementation: The single thread will first copy its descriptor
2058address (cpy_data) to a team-private location, then the other threads will each
2059call the function pointed to by the parameter cpy_func, which carries out the
2060copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002061
Jonathan Peyton30419822017-05-12 18:01:32 +00002062The cpy_func routine used for the copy and the contents of the data area defined
2063by cpy_data and cpy_size may be built in any fashion that will allow the copy
2064to be done. For instance, the cpy_data buffer can hold the actual data to be
2065copied or it may hold a list of pointers to the data. The cpy_func routine must
2066interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067
2068The interface to cpy_func is as follows:
2069@code
2070void cpy_func( void *destination, void *source )
2071@endcode
2072where void *destination is the cpy_data pointer for the thread being copied to
2073and void *source is the cpy_data pointer for the thread being copied from.
2074*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002075void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2076 void *cpy_data, void (*cpy_func)(void *, void *),
2077 kmp_int32 didit) {
2078 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079
Jonathan Peyton30419822017-05-12 18:01:32 +00002080 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081
Jonathan Peyton30419822017-05-12 18:01:32 +00002082 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
Jonathan Peyton30419822017-05-12 18:01:32 +00002084 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085
Jonathan Peyton30419822017-05-12 18:01:32 +00002086 if (__kmp_env_consistency_check) {
2087 if (loc == 0) {
2088 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002090 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
Jonathan Peyton30419822017-05-12 18:01:32 +00002092 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002093
Jonathan Peyton30419822017-05-12 18:01:32 +00002094 if (didit)
2095 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096
Joachim Protze82e94a52017-11-01 10:08:30 +00002097#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002098 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002099 if (ompt_enabled.enabled) {
2100 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002101 if (ompt_frame->enter_frame.ptr == NULL)
2102 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002103 OMPT_STORE_RETURN_ADDRESS(gtid);
2104 }
2105#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002106/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002107#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002108 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002110 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002111
Jonathan Peyton30419822017-05-12 18:01:32 +00002112 if (!didit)
2113 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002114
Jonathan Peyton30419822017-05-12 18:01:32 +00002115// Consider next barrier a user-visible barrier for barrier region boundaries
2116// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117
Joachim Protze82e94a52017-11-01 10:08:30 +00002118#if OMPT_SUPPORT
2119 if (ompt_enabled.enabled) {
2120 OMPT_STORE_RETURN_ADDRESS(gtid);
2121 }
2122#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002123#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002124 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2125// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002126#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002127 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002128#if OMPT_SUPPORT && OMPT_OPTIONAL
2129 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002130 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002131 }
2132#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133}
2134
2135/* -------------------------------------------------------------------------- */
2136
Jonathan Peyton30419822017-05-12 18:01:32 +00002137#define INIT_LOCK __kmp_init_user_lock_with_checks
2138#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2139#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2140#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2141#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2142#define ACQUIRE_NESTED_LOCK_TIMED \
2143 __kmp_acquire_nested_user_lock_with_checks_timed
2144#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2145#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2146#define TEST_LOCK __kmp_test_user_lock_with_checks
2147#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2148#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2149#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150
Jonathan Peyton30419822017-05-12 18:01:32 +00002151// TODO: Make check abort messages use location info & pass it into
2152// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002154#if KMP_USE_DYNAMIC_LOCK
2155
2156// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002157static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2158 kmp_dyna_lockseq_t seq) {
2159 if (KMP_IS_D_LOCK(seq)) {
2160 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002161#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002162 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002163#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002164 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002165 KMP_INIT_I_LOCK(lock, seq);
2166#if USE_ITT_BUILD
2167 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2168 __kmp_itt_lock_creating(ilk->lock, loc);
2169#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002170 }
2171}
2172
2173// internal nest lock initializer
2174static __forceinline void
2175__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2176 kmp_dyna_lockseq_t seq) {
2177#if KMP_USE_TSX
2178 // Don't have nested lock implementation for speculative locks
2179 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2180 seq = __kmp_user_lock_seq;
2181#endif
2182 switch (seq) {
2183 case lockseq_tas:
2184 seq = lockseq_nested_tas;
2185 break;
2186#if KMP_USE_FUTEX
2187 case lockseq_futex:
2188 seq = lockseq_nested_futex;
2189 break;
2190#endif
2191 case lockseq_ticket:
2192 seq = lockseq_nested_ticket;
2193 break;
2194 case lockseq_queuing:
2195 seq = lockseq_nested_queuing;
2196 break;
2197 case lockseq_drdpa:
2198 seq = lockseq_nested_drdpa;
2199 break;
2200 default:
2201 seq = lockseq_nested_queuing;
2202 }
2203 KMP_INIT_I_LOCK(lock, seq);
2204#if USE_ITT_BUILD
2205 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2206 __kmp_itt_lock_creating(ilk->lock, loc);
2207#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002208}
2209
2210/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002211void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2212 uintptr_t hint) {
2213 KMP_DEBUG_ASSERT(__kmp_init_serial);
2214 if (__kmp_env_consistency_check && user_lock == NULL) {
2215 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2216 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002217
Jonathan Peyton30419822017-05-12 18:01:32 +00002218 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002219
2220#if OMPT_SUPPORT && OMPT_OPTIONAL
2221 // This is the case, if called from omp_init_lock_with_hint:
2222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2223 if (!codeptr)
2224 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2225 if (ompt_enabled.ompt_callback_lock_init) {
2226 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2227 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002228 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002229 codeptr);
2230 }
2231#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002232}
2233
2234/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002235void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2236 void **user_lock, uintptr_t hint) {
2237 KMP_DEBUG_ASSERT(__kmp_init_serial);
2238 if (__kmp_env_consistency_check && user_lock == NULL) {
2239 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2240 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002241
Jonathan Peyton30419822017-05-12 18:01:32 +00002242 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002243
2244#if OMPT_SUPPORT && OMPT_OPTIONAL
2245 // This is the case, if called from omp_init_lock_with_hint:
2246 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2247 if (!codeptr)
2248 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2249 if (ompt_enabled.ompt_callback_lock_init) {
2250 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2251 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002252 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002253 codeptr);
2254 }
2255#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002256}
2257
2258#endif // KMP_USE_DYNAMIC_LOCK
2259
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002261void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002262#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002263
2264 KMP_DEBUG_ASSERT(__kmp_init_serial);
2265 if (__kmp_env_consistency_check && user_lock == NULL) {
2266 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2267 }
2268 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002269
Joachim Protze82e94a52017-11-01 10:08:30 +00002270#if OMPT_SUPPORT && OMPT_OPTIONAL
2271 // This is the case, if called from omp_init_lock_with_hint:
2272 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2273 if (!codeptr)
2274 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2275 if (ompt_enabled.ompt_callback_lock_init) {
2276 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2277 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002278 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002279 codeptr);
2280 }
2281#endif
2282
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002283#else // KMP_USE_DYNAMIC_LOCK
2284
Jonathan Peyton30419822017-05-12 18:01:32 +00002285 static char const *const func = "omp_init_lock";
2286 kmp_user_lock_p lck;
2287 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002288
Jonathan Peyton30419822017-05-12 18:01:32 +00002289 if (__kmp_env_consistency_check) {
2290 if (user_lock == NULL) {
2291 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002293 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002294
Jonathan Peyton30419822017-05-12 18:01:32 +00002295 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296
Jonathan Peyton30419822017-05-12 18:01:32 +00002297 if ((__kmp_user_lock_kind == lk_tas) &&
2298 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2299 lck = (kmp_user_lock_p)user_lock;
2300 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002301#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002302 else if ((__kmp_user_lock_kind == lk_futex) &&
2303 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2304 lck = (kmp_user_lock_p)user_lock;
2305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002307 else {
2308 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2309 }
2310 INIT_LOCK(lck);
2311 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312
Joachim Protze82e94a52017-11-01 10:08:30 +00002313#if OMPT_SUPPORT && OMPT_OPTIONAL
2314 // This is the case, if called from omp_init_lock_with_hint:
2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316 if (!codeptr)
2317 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318 if (ompt_enabled.ompt_callback_lock_init) {
2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002321 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002322 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002323#endif
2324
Jim Cownie5e8470a2013-09-27 10:38:44 +00002325#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002326 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002327#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002328
2329#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002330} // __kmpc_init_lock
2331
2332/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002333void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002334#if KMP_USE_DYNAMIC_LOCK
2335
Jonathan Peyton30419822017-05-12 18:01:32 +00002336 KMP_DEBUG_ASSERT(__kmp_init_serial);
2337 if (__kmp_env_consistency_check && user_lock == NULL) {
2338 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2339 }
2340 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002341
Joachim Protze82e94a52017-11-01 10:08:30 +00002342#if OMPT_SUPPORT && OMPT_OPTIONAL
2343 // This is the case, if called from omp_init_lock_with_hint:
2344 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2345 if (!codeptr)
2346 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2347 if (ompt_enabled.ompt_callback_lock_init) {
2348 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2349 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002350 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002351 codeptr);
2352 }
2353#endif
2354
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002355#else // KMP_USE_DYNAMIC_LOCK
2356
Jonathan Peyton30419822017-05-12 18:01:32 +00002357 static char const *const func = "omp_init_nest_lock";
2358 kmp_user_lock_p lck;
2359 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002360
Jonathan Peyton30419822017-05-12 18:01:32 +00002361 if (__kmp_env_consistency_check) {
2362 if (user_lock == NULL) {
2363 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002364 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002365 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366
Jonathan Peyton30419822017-05-12 18:01:32 +00002367 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 if ((__kmp_user_lock_kind == lk_tas) &&
2370 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2371 OMP_NEST_LOCK_T_SIZE)) {
2372 lck = (kmp_user_lock_p)user_lock;
2373 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002374#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002375 else if ((__kmp_user_lock_kind == lk_futex) &&
2376 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2377 OMP_NEST_LOCK_T_SIZE)) {
2378 lck = (kmp_user_lock_p)user_lock;
2379 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002381 else {
2382 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2383 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002384
Jonathan Peyton30419822017-05-12 18:01:32 +00002385 INIT_NESTED_LOCK(lck);
2386 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002387
Joachim Protze82e94a52017-11-01 10:08:30 +00002388#if OMPT_SUPPORT && OMPT_OPTIONAL
2389 // This is the case, if called from omp_init_lock_with_hint:
2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2391 if (!codeptr)
2392 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2393 if (ompt_enabled.ompt_callback_lock_init) {
2394 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2395 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002396 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002397 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002398#endif
2399
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002401 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002402#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002403
2404#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002405} // __kmpc_init_nest_lock
2406
Jonathan Peyton30419822017-05-12 18:01:32 +00002407void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002408#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409
Jonathan Peyton30419822017-05-12 18:01:32 +00002410#if USE_ITT_BUILD
2411 kmp_user_lock_p lck;
2412 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2413 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2414 } else {
2415 lck = (kmp_user_lock_p)user_lock;
2416 }
2417 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002419#if OMPT_SUPPORT && OMPT_OPTIONAL
2420 // This is the case, if called from omp_init_lock_with_hint:
2421 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2422 if (!codeptr)
2423 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2424 if (ompt_enabled.ompt_callback_lock_destroy) {
2425 kmp_user_lock_p lck;
2426 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2427 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2428 } else {
2429 lck = (kmp_user_lock_p)user_lock;
2430 }
2431 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002432 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002433 }
2434#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002435 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2436#else
2437 kmp_user_lock_p lck;
2438
2439 if ((__kmp_user_lock_kind == lk_tas) &&
2440 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2441 lck = (kmp_user_lock_p)user_lock;
2442 }
2443#if KMP_USE_FUTEX
2444 else if ((__kmp_user_lock_kind == lk_futex) &&
2445 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2446 lck = (kmp_user_lock_p)user_lock;
2447 }
2448#endif
2449 else {
2450 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2451 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452
Joachim Protze82e94a52017-11-01 10:08:30 +00002453#if OMPT_SUPPORT && OMPT_OPTIONAL
2454 // This is the case, if called from omp_init_lock_with_hint:
2455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2456 if (!codeptr)
2457 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2458 if (ompt_enabled.ompt_callback_lock_destroy) {
2459 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002460 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002461 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002462#endif
2463
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002465 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002467 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468
Jonathan Peyton30419822017-05-12 18:01:32 +00002469 if ((__kmp_user_lock_kind == lk_tas) &&
2470 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2471 ;
2472 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002473#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002474 else if ((__kmp_user_lock_kind == lk_futex) &&
2475 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2476 ;
2477 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002479 else {
2480 __kmp_user_lock_free(user_lock, gtid, lck);
2481 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002482#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002483} // __kmpc_destroy_lock
2484
2485/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002486void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002487#if KMP_USE_DYNAMIC_LOCK
2488
Jonathan Peyton30419822017-05-12 18:01:32 +00002489#if USE_ITT_BUILD
2490 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2491 __kmp_itt_lock_destroyed(ilk->lock);
2492#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002493#if OMPT_SUPPORT && OMPT_OPTIONAL
2494 // This is the case, if called from omp_init_lock_with_hint:
2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2496 if (!codeptr)
2497 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498 if (ompt_enabled.ompt_callback_lock_destroy) {
2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002500 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002501 }
2502#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002503 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002504
2505#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002506
Jonathan Peyton30419822017-05-12 18:01:32 +00002507 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002508
Jonathan Peyton30419822017-05-12 18:01:32 +00002509 if ((__kmp_user_lock_kind == lk_tas) &&
2510 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2511 OMP_NEST_LOCK_T_SIZE)) {
2512 lck = (kmp_user_lock_p)user_lock;
2513 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002514#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002515 else if ((__kmp_user_lock_kind == lk_futex) &&
2516 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2517 OMP_NEST_LOCK_T_SIZE)) {
2518 lck = (kmp_user_lock_p)user_lock;
2519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002520#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002521 else {
2522 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2523 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524
Joachim Protze82e94a52017-11-01 10:08:30 +00002525#if OMPT_SUPPORT && OMPT_OPTIONAL
2526 // This is the case, if called from omp_init_lock_with_hint:
2527 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2528 if (!codeptr)
2529 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2530 if (ompt_enabled.ompt_callback_lock_destroy) {
2531 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002532 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002533 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002534#endif
2535
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002537 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538#endif /* USE_ITT_BUILD */
2539
Jonathan Peyton30419822017-05-12 18:01:32 +00002540 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541
Jonathan Peyton30419822017-05-12 18:01:32 +00002542 if ((__kmp_user_lock_kind == lk_tas) &&
2543 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2544 OMP_NEST_LOCK_T_SIZE)) {
2545 ;
2546 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002547#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002548 else if ((__kmp_user_lock_kind == lk_futex) &&
2549 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2550 OMP_NEST_LOCK_T_SIZE)) {
2551 ;
2552 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002554 else {
2555 __kmp_user_lock_free(user_lock, gtid, lck);
2556 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002557#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558} // __kmpc_destroy_nest_lock
2559
Jonathan Peyton30419822017-05-12 18:01:32 +00002560void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2561 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002562#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002563 int tag = KMP_EXTRACT_D_TAG(user_lock);
2564#if USE_ITT_BUILD
2565 __kmp_itt_lock_acquiring(
2566 (kmp_user_lock_p)
2567 user_lock); // itt function will get to the right lock object.
2568#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002569#if OMPT_SUPPORT && OMPT_OPTIONAL
2570 // This is the case, if called from omp_init_lock_with_hint:
2571 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2572 if (!codeptr)
2573 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2574 if (ompt_enabled.ompt_callback_mutex_acquire) {
2575 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2576 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002577 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002578 codeptr);
2579 }
2580#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002581#if KMP_USE_INLINED_TAS
2582 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2583 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2584 } else
2585#elif KMP_USE_INLINED_FUTEX
2586 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2587 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2588 } else
2589#endif
2590 {
2591 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2592 }
2593#if USE_ITT_BUILD
2594 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2595#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002596#if OMPT_SUPPORT && OMPT_OPTIONAL
2597 if (ompt_enabled.ompt_callback_mutex_acquired) {
2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002599 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002600 }
2601#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002602
2603#else // KMP_USE_DYNAMIC_LOCK
2604
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606
Jonathan Peyton30419822017-05-12 18:01:32 +00002607 if ((__kmp_user_lock_kind == lk_tas) &&
2608 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2610 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002611#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002612 else if ((__kmp_user_lock_kind == lk_futex) &&
2613 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2614 lck = (kmp_user_lock_p)user_lock;
2615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002617 else {
2618 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2619 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620
2621#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002622 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002624#if OMPT_SUPPORT && OMPT_OPTIONAL
2625 // This is the case, if called from omp_init_lock_with_hint:
2626 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2627 if (!codeptr)
2628 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2629 if (ompt_enabled.ompt_callback_mutex_acquire) {
2630 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2631 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002632 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002633 }
2634#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002635
Jonathan Peyton30419822017-05-12 18:01:32 +00002636 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637
2638#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002639 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641
Joachim Protze82e94a52017-11-01 10:08:30 +00002642#if OMPT_SUPPORT && OMPT_OPTIONAL
2643 if (ompt_enabled.ompt_callback_mutex_acquired) {
2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002645 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002646 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002647#endif
2648
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002649#endif // KMP_USE_DYNAMIC_LOCK
2650}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651
Jonathan Peyton30419822017-05-12 18:01:32 +00002652void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002653#if KMP_USE_DYNAMIC_LOCK
2654
Jonathan Peyton30419822017-05-12 18:01:32 +00002655#if USE_ITT_BUILD
2656 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2657#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002658#if OMPT_SUPPORT && OMPT_OPTIONAL
2659 // This is the case, if called from omp_init_lock_with_hint:
2660 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2661 if (!codeptr)
2662 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2663 if (ompt_enabled.enabled) {
2664 if (ompt_enabled.ompt_callback_mutex_acquire) {
2665 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2666 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002667 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002668 codeptr);
2669 }
2670 }
2671#endif
2672 int acquire_status =
2673 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002674 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002675#if USE_ITT_BUILD
2676 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002677#endif
2678
Joachim Protze82e94a52017-11-01 10:08:30 +00002679#if OMPT_SUPPORT && OMPT_OPTIONAL
2680 if (ompt_enabled.enabled) {
2681 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2682 if (ompt_enabled.ompt_callback_mutex_acquired) {
2683 // lock_first
2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002685 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002686 }
2687 } else {
2688 if (ompt_enabled.ompt_callback_nest_lock) {
2689 // lock_next
2690 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002691 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002692 }
2693 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002695#endif
2696
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002697#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002698 int acquire_status;
2699 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002700
Jonathan Peyton30419822017-05-12 18:01:32 +00002701 if ((__kmp_user_lock_kind == lk_tas) &&
2702 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2703 OMP_NEST_LOCK_T_SIZE)) {
2704 lck = (kmp_user_lock_p)user_lock;
2705 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002706#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002707 else if ((__kmp_user_lock_kind == lk_futex) &&
2708 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2709 OMP_NEST_LOCK_T_SIZE)) {
2710 lck = (kmp_user_lock_p)user_lock;
2711 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002713 else {
2714 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2715 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002716
2717#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002718 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002719#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002720#if OMPT_SUPPORT && OMPT_OPTIONAL
2721 // This is the case, if called from omp_init_lock_with_hint:
2722 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2723 if (!codeptr)
2724 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2725 if (ompt_enabled.enabled) {
2726 if (ompt_enabled.ompt_callback_mutex_acquire) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2728 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002729 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002730 }
2731 }
2732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733
Jonathan Peyton30419822017-05-12 18:01:32 +00002734 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735
2736#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002737 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002739
Joachim Protze82e94a52017-11-01 10:08:30 +00002740#if OMPT_SUPPORT && OMPT_OPTIONAL
2741 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002742 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002743 if (ompt_enabled.ompt_callback_mutex_acquired) {
2744 // lock_first
2745 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002746 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002747 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002749 if (ompt_enabled.ompt_callback_nest_lock) {
2750 // lock_next
2751 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002752 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002753 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002754 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002755 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002756#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002757
2758#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002759}
2760
Jonathan Peyton30419822017-05-12 18:01:32 +00002761void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002762#if KMP_USE_DYNAMIC_LOCK
2763
Jonathan Peyton30419822017-05-12 18:01:32 +00002764 int tag = KMP_EXTRACT_D_TAG(user_lock);
2765#if USE_ITT_BUILD
2766 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2767#endif
2768#if KMP_USE_INLINED_TAS
2769 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2770 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2771 } else
2772#elif KMP_USE_INLINED_FUTEX
2773 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2774 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2775 } else
2776#endif
2777 {
2778 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2779 }
2780
Joachim Protze82e94a52017-11-01 10:08:30 +00002781#if OMPT_SUPPORT && OMPT_OPTIONAL
2782 // This is the case, if called from omp_init_lock_with_hint:
2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784 if (!codeptr)
2785 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786 if (ompt_enabled.ompt_callback_mutex_released) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002788 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002789 }
2790#endif
2791
Jonathan Peyton30419822017-05-12 18:01:32 +00002792#else // KMP_USE_DYNAMIC_LOCK
2793
2794 kmp_user_lock_p lck;
2795
2796 /* Can't use serial interval since not block structured */
2797 /* release the lock */
2798
2799 if ((__kmp_user_lock_kind == lk_tas) &&
2800 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2801#if KMP_OS_LINUX && \
2802 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2803// "fast" path implemented to fix customer performance issue
2804#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002805 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002807 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2808 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002809
2810#if OMPT_SUPPORT && OMPT_OPTIONAL
2811 // This is the case, if called from omp_init_lock_with_hint:
2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2813 if (!codeptr)
2814 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2815 if (ompt_enabled.ompt_callback_mutex_released) {
2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002817 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002818 }
2819#endif
2820
Jonathan Peyton30419822017-05-12 18:01:32 +00002821 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002822#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002823 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002825 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002826#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002827 else if ((__kmp_user_lock_kind == lk_futex) &&
2828 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2829 lck = (kmp_user_lock_p)user_lock;
2830 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002831#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002832 else {
2833 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2834 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002835
2836#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838#endif /* USE_ITT_BUILD */
2839
Jonathan Peyton30419822017-05-12 18:01:32 +00002840 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002841
Joachim Protze82e94a52017-11-01 10:08:30 +00002842#if OMPT_SUPPORT && OMPT_OPTIONAL
2843 // This is the case, if called from omp_init_lock_with_hint:
2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2845 if (!codeptr)
2846 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2847 if (ompt_enabled.ompt_callback_mutex_released) {
2848 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002849 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002850 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002851#endif
2852
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002853#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854}
2855
2856/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002857void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002858#if KMP_USE_DYNAMIC_LOCK
2859
Jonathan Peyton30419822017-05-12 18:01:32 +00002860#if USE_ITT_BUILD
2861 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2862#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002863 int release_status =
2864 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002865 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002866
2867#if OMPT_SUPPORT && OMPT_OPTIONAL
2868 // This is the case, if called from omp_init_lock_with_hint:
2869 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2870 if (!codeptr)
2871 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2872 if (ompt_enabled.enabled) {
2873 if (release_status == KMP_LOCK_RELEASED) {
2874 if (ompt_enabled.ompt_callback_mutex_released) {
2875 // release_lock_last
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002877 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002878 }
2879 } else if (ompt_enabled.ompt_callback_nest_lock) {
2880 // release_lock_prev
2881 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002882 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002883 }
2884 }
2885#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002886
2887#else // KMP_USE_DYNAMIC_LOCK
2888
2889 kmp_user_lock_p lck;
2890
2891 /* Can't use serial interval since not block structured */
2892
2893 if ((__kmp_user_lock_kind == lk_tas) &&
2894 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2895 OMP_NEST_LOCK_T_SIZE)) {
2896#if KMP_OS_LINUX && \
2897 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2898 // "fast" path implemented to fix customer performance issue
2899 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2900#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002901 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002903
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2905 int release_status = KMP_LOCK_STILL_HELD;
2906#endif
2907
Jonathan Peyton30419822017-05-12 18:01:32 +00002908 if (--(tl->lk.depth_locked) == 0) {
2909 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002910#if OMPT_SUPPORT && OMPT_OPTIONAL
2911 release_status = KMP_LOCK_RELEASED;
2912#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 }
2914 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002915
2916#if OMPT_SUPPORT && OMPT_OPTIONAL
2917 // This is the case, if called from omp_init_lock_with_hint:
2918 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2919 if (!codeptr)
2920 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2921 if (ompt_enabled.enabled) {
2922 if (release_status == KMP_LOCK_RELEASED) {
2923 if (ompt_enabled.ompt_callback_mutex_released) {
2924 // release_lock_last
2925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002926 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002927 }
2928 } else if (ompt_enabled.ompt_callback_nest_lock) {
2929 // release_lock_previous
2930 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002931 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002932 }
2933 }
2934#endif
2935
Jonathan Peyton30419822017-05-12 18:01:32 +00002936 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002938 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002940 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002941#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002942 else if ((__kmp_user_lock_kind == lk_futex) &&
2943 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2944 OMP_NEST_LOCK_T_SIZE)) {
2945 lck = (kmp_user_lock_p)user_lock;
2946 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002948 else {
2949 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2950 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002951
2952#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002953 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002954#endif /* USE_ITT_BUILD */
2955
Jonathan Peyton30419822017-05-12 18:01:32 +00002956 int release_status;
2957 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002958#if OMPT_SUPPORT && OMPT_OPTIONAL
2959 // This is the case, if called from omp_init_lock_with_hint:
2960 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2961 if (!codeptr)
2962 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2963 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002964 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002965 if (ompt_enabled.ompt_callback_mutex_released) {
2966 // release_lock_last
2967 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002968 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002969 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002970 } else if (ompt_enabled.ompt_callback_nest_lock) {
2971 // release_lock_previous
2972 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002973 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002974 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002975 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002976#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002977
2978#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002979}
2980
2981/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002982int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2983 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002984
2985#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002986 int rc;
2987 int tag = KMP_EXTRACT_D_TAG(user_lock);
2988#if USE_ITT_BUILD
2989 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2990#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002991#if OMPT_SUPPORT && OMPT_OPTIONAL
2992 // This is the case, if called from omp_init_lock_with_hint:
2993 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2994 if (!codeptr)
2995 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2996 if (ompt_enabled.ompt_callback_mutex_acquire) {
2997 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2998 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002999 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003000 codeptr);
3001 }
3002#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003003#if KMP_USE_INLINED_TAS
3004 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3005 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3006 } else
3007#elif KMP_USE_INLINED_FUTEX
3008 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3009 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3010 } else
3011#endif
3012 {
3013 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3014 }
3015 if (rc) {
3016#if USE_ITT_BUILD
3017 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3018#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003019#if OMPT_SUPPORT && OMPT_OPTIONAL
3020 if (ompt_enabled.ompt_callback_mutex_acquired) {
3021 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003022 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003023 }
3024#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003025 return FTN_TRUE;
3026 } else {
3027#if USE_ITT_BUILD
3028 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3029#endif
3030 return FTN_FALSE;
3031 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003032
3033#else // KMP_USE_DYNAMIC_LOCK
3034
Jonathan Peyton30419822017-05-12 18:01:32 +00003035 kmp_user_lock_p lck;
3036 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037
Jonathan Peyton30419822017-05-12 18:01:32 +00003038 if ((__kmp_user_lock_kind == lk_tas) &&
3039 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3040 lck = (kmp_user_lock_p)user_lock;
3041 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003042#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003043 else if ((__kmp_user_lock_kind == lk_futex) &&
3044 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3045 lck = (kmp_user_lock_p)user_lock;
3046 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003048 else {
3049 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3050 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003051
3052#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003054#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003055#if OMPT_SUPPORT && OMPT_OPTIONAL
3056 // This is the case, if called from omp_init_lock_with_hint:
3057 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3058 if (!codeptr)
3059 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3060 if (ompt_enabled.ompt_callback_mutex_acquire) {
3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3062 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003063 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003064 }
3065#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066
Jonathan Peyton30419822017-05-12 18:01:32 +00003067 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003068#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003069 if (rc) {
3070 __kmp_itt_lock_acquired(lck);
3071 } else {
3072 __kmp_itt_lock_cancelled(lck);
3073 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003075#if OMPT_SUPPORT && OMPT_OPTIONAL
3076 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3077 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003078 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003079 }
3080#endif
3081
Jonathan Peyton30419822017-05-12 18:01:32 +00003082 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083
Jonathan Peyton30419822017-05-12 18:01:32 +00003084/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003085
3086#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087}
3088
3089/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003090int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003091#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003092 int rc;
3093#if USE_ITT_BUILD
3094 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3095#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003096#if OMPT_SUPPORT && OMPT_OPTIONAL
3097 // This is the case, if called from omp_init_lock_with_hint:
3098 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3099 if (!codeptr)
3100 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3101 if (ompt_enabled.ompt_callback_mutex_acquire) {
3102 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3103 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003104 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003105 codeptr);
3106 }
3107#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003108 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3109#if USE_ITT_BUILD
3110 if (rc) {
3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3112 } else {
3113 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3114 }
3115#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003116#if OMPT_SUPPORT && OMPT_OPTIONAL
3117 if (ompt_enabled.enabled && rc) {
3118 if (rc == 1) {
3119 if (ompt_enabled.ompt_callback_mutex_acquired) {
3120 // lock_first
3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003122 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003123 }
3124 } else {
3125 if (ompt_enabled.ompt_callback_nest_lock) {
3126 // lock_next
3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003128 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003129 }
3130 }
3131 }
3132#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003133 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003134
3135#else // KMP_USE_DYNAMIC_LOCK
3136
Jonathan Peyton30419822017-05-12 18:01:32 +00003137 kmp_user_lock_p lck;
3138 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139
Jonathan Peyton30419822017-05-12 18:01:32 +00003140 if ((__kmp_user_lock_kind == lk_tas) &&
3141 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3142 OMP_NEST_LOCK_T_SIZE)) {
3143 lck = (kmp_user_lock_p)user_lock;
3144 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003145#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003146 else if ((__kmp_user_lock_kind == lk_futex) &&
3147 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3148 OMP_NEST_LOCK_T_SIZE)) {
3149 lck = (kmp_user_lock_p)user_lock;
3150 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003151#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003152 else {
3153 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3154 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003155
3156#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003158#endif /* USE_ITT_BUILD */
3159
Joachim Protze82e94a52017-11-01 10:08:30 +00003160#if OMPT_SUPPORT && OMPT_OPTIONAL
3161 // This is the case, if called from omp_init_lock_with_hint:
3162 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3163 if (!codeptr)
3164 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3165 if (ompt_enabled.enabled) &&
3166 ompt_enabled.ompt_callback_mutex_acquire) {
3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3168 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003169 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003170 }
3171#endif
3172
Jonathan Peyton30419822017-05-12 18:01:32 +00003173 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003174#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003175 if (rc) {
3176 __kmp_itt_lock_acquired(lck);
3177 } else {
3178 __kmp_itt_lock_cancelled(lck);
3179 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003180#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003181#if OMPT_SUPPORT && OMPT_OPTIONAL
3182 if (ompt_enabled.enabled && rc) {
3183 if (rc == 1) {
3184 if (ompt_enabled.ompt_callback_mutex_acquired) {
3185 // lock_first
3186 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003187 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003188 }
3189 } else {
3190 if (ompt_enabled.ompt_callback_nest_lock) {
3191 // lock_next
3192 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003193 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003194 }
3195 }
3196 }
3197#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003198 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003199
Jonathan Peyton30419822017-05-12 18:01:32 +00003200/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003201
3202#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003203}
3204
Jonathan Peyton30419822017-05-12 18:01:32 +00003205// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003206
Jonathan Peyton30419822017-05-12 18:01:32 +00003207// keep the selected method in a thread local structure for cross-function
3208// usage: will be used in __kmpc_end_reduce* functions;
3209// another solution: to re-determine the method one more time in
3210// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003211// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003212#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3213 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003214
Jonathan Peyton30419822017-05-12 18:01:32 +00003215#define __KMP_GET_REDUCTION_METHOD(gtid) \
3216 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217
Jonathan Peyton30419822017-05-12 18:01:32 +00003218// description of the packed_reduction_method variable: look at the macros in
3219// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220
3221// used in a critical section reduce block
3222static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003223__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3224 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003225
Jonathan Peyton30419822017-05-12 18:01:32 +00003226 // this lock was visible to a customer and to the threading profile tool as a
3227 // serial overhead span (although it's used for an internal purpose only)
3228 // why was it visible in previous implementation?
3229 // should we keep it visible in new reduce block?
3230 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003232#if KMP_USE_DYNAMIC_LOCK
3233
Jonathan Peyton30419822017-05-12 18:01:32 +00003234 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3235 // Check if it is initialized.
3236 if (*lk == 0) {
3237 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3238 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3239 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003240 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003241 __kmp_init_indirect_csptr(crit, loc, global_tid,
3242 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003243 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 }
3245 // Branch for accessing the actual lock object and set operation. This
3246 // branching is inevitable since this lock initialization does not follow the
3247 // normal dispatch path (lock table is not used).
3248 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3249 lck = (kmp_user_lock_p)lk;
3250 KMP_DEBUG_ASSERT(lck != NULL);
3251 if (__kmp_env_consistency_check) {
3252 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3253 }
3254 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3255 } else {
3256 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3257 lck = ilk->lock;
3258 KMP_DEBUG_ASSERT(lck != NULL);
3259 if (__kmp_env_consistency_check) {
3260 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3261 }
3262 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3263 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003264
3265#else // KMP_USE_DYNAMIC_LOCK
3266
Jonathan Peyton30419822017-05-12 18:01:32 +00003267 // We know that the fast reduction code is only emitted by Intel compilers
3268 // with 32 byte critical sections. If there isn't enough space, then we
3269 // have to use a pointer.
3270 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3271 lck = (kmp_user_lock_p)crit;
3272 } else {
3273 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3274 }
3275 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 if (__kmp_env_consistency_check)
3278 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003279
Jonathan Peyton30419822017-05-12 18:01:32 +00003280 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003281
3282#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003283}
3284
3285// used in a critical section reduce block
3286static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003287__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3288 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003289
Jonathan Peyton30419822017-05-12 18:01:32 +00003290 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003291
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003292#if KMP_USE_DYNAMIC_LOCK
3293
Jonathan Peyton30419822017-05-12 18:01:32 +00003294 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3295 lck = (kmp_user_lock_p)crit;
3296 if (__kmp_env_consistency_check)
3297 __kmp_pop_sync(global_tid, ct_critical, loc);
3298 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3299 } else {
3300 kmp_indirect_lock_t *ilk =
3301 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3302 if (__kmp_env_consistency_check)
3303 __kmp_pop_sync(global_tid, ct_critical, loc);
3304 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3305 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003306
3307#else // KMP_USE_DYNAMIC_LOCK
3308
Jonathan Peyton30419822017-05-12 18:01:32 +00003309 // We know that the fast reduction code is only emitted by Intel compilers
3310 // with 32 byte critical sections. If there isn't enough space, then we have
3311 // to use a pointer.
3312 if (__kmp_base_user_lock_size > 32) {
3313 lck = *((kmp_user_lock_p *)crit);
3314 KMP_ASSERT(lck != NULL);
3315 } else {
3316 lck = (kmp_user_lock_p)crit;
3317 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318
Jonathan Peyton30419822017-05-12 18:01:32 +00003319 if (__kmp_env_consistency_check)
3320 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003323
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003324#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003325} // __kmp_end_critical_section_reduce_block
3326
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003327#if OMP_40_ENABLED
3328static __forceinline int
3329__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3330 int *task_state) {
3331 kmp_team_t *team;
3332
3333 // Check if we are inside the teams construct?
3334 if (th->th.th_teams_microtask) {
3335 *team_p = team = th->th.th_team;
3336 if (team->t.t_level == th->th.th_teams_level) {
3337 // This is reduction at teams construct.
3338 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3339 // Let's swap teams temporarily for the reduction.
3340 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3341 th->th.th_team = team->t.t_parent;
3342 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3343 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3344 *task_state = th->th.th_task_state;
3345 th->th.th_task_state = 0;
3346
3347 return 1;
3348 }
3349 }
3350 return 0;
3351}
3352
3353static __forceinline void
3354__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3355 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3356 th->th.th_info.ds.ds_tid = 0;
3357 th->th.th_team = team;
3358 th->th.th_team_nproc = team->t.t_nproc;
3359 th->th.th_task_team = team->t.t_task_team[task_state];
3360 th->th.th_task_state = task_state;
3361}
3362#endif
3363
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364/* 2.a.i. Reduce Block without a terminating barrier */
3365/*!
3366@ingroup SYNCHRONIZATION
3367@param loc source location information
3368@param global_tid global thread number
3369@param num_vars number of items (variables) to be reduced
3370@param reduce_size size of data in bytes to be reduced
3371@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003372@param reduce_func callback function providing reduction operation on two
3373operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003374@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003375@result 1 for the master thread, 0 for all other team threads, 2 for all team
3376threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003377
3378The nowait version is used for a reduce clause with the nowait argument.
3379*/
3380kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003381__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3382 size_t reduce_size, void *reduce_data,
3383 void (*reduce_func)(void *lhs_data, void *rhs_data),
3384 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386 KMP_COUNT_BLOCK(REDUCE_nowait);
3387 int retval = 0;
3388 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003389#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003390 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003391 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003392 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003393#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003394 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003395
Jonathan Peyton30419822017-05-12 18:01:32 +00003396 // why do we need this initialization here at all?
3397 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 // do not call __kmp_serial_initialize(), it will be called by
3400 // __kmp_parallel_initialize() if needed
3401 // possible detection of false-positive race by the threadchecker ???
3402 if (!TCR_4(__kmp_init_parallel))
3403 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003405#if OMP_50_ENABLED
3406 __kmp_resume_if_soft_paused();
3407#endif
3408
Jonathan Peyton30419822017-05-12 18:01:32 +00003409// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003410#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003411 if (__kmp_env_consistency_check)
3412 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003413#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003414 if (__kmp_env_consistency_check)
3415 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003416#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003418#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003419 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003420 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003421#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003422
Jonathan Peyton30419822017-05-12 18:01:32 +00003423 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3424 // the value should be kept in a variable
3425 // the variable should be either a construct-specific or thread-specific
3426 // property, not a team specific property
3427 // (a thread can reach the next reduce block on the next construct, reduce
3428 // method may differ on the next construct)
3429 // an ident_t "loc" parameter could be used as a construct-specific property
3430 // (what if loc == 0?)
3431 // (if both construct-specific and team-specific variables were shared,
3432 // then unness extra syncs should be needed)
3433 // a thread-specific variable is better regarding two issues above (next
3434 // construct and extra syncs)
3435 // a thread-specific "th_local.reduction_method" variable is used currently
3436 // each thread executes 'determine' and 'set' lines (no need to execute by one
3437 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438
Jonathan Peyton30419822017-05-12 18:01:32 +00003439 packed_reduction_method = __kmp_determine_reduction_method(
3440 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3441 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3446 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003447
Jonathan Peyton30419822017-05-12 18:01:32 +00003448 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003449
Jonathan Peyton30419822017-05-12 18:01:32 +00003450 // usage: if team size == 1, no synchronization is required ( Intel
3451 // platforms only )
3452 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003453
Jonathan Peyton30419822017-05-12 18:01:32 +00003454 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003457
Jonathan Peyton30419822017-05-12 18:01:32 +00003458 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3459 // won't be called by the code gen)
3460 // (it's not quite good, because the checking block has been closed by
3461 // this 'pop',
3462 // but atomic operation has not been executed yet, will be executed
3463 // slightly later, literally on next instruction)
3464 if (__kmp_env_consistency_check)
3465 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
Jonathan Peyton30419822017-05-12 18:01:32 +00003467 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3468 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003469
Jonathan Peyton30419822017-05-12 18:01:32 +00003470// AT: performance issue: a real barrier here
3471// AT: (if master goes slow, other threads are blocked here waiting for the
3472// master to come and release them)
3473// AT: (it's not what a customer might expect specifying NOWAIT clause)
3474// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3475// be confusing to a customer)
3476// AT: another implementation of *barrier_gather*nowait() (or some other design)
3477// might go faster and be more in line with sense of NOWAIT
3478// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479
Jonathan Peyton30419822017-05-12 18:01:32 +00003480// this barrier should be invisible to a customer and to the threading profile
3481// tool (it's neither a terminating barrier nor customer's code, it's
3482// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003483#if OMPT_SUPPORT
3484 // JP: can this barrier potentially leed to task scheduling?
3485 // JP: as long as there is a barrier in the implementation, OMPT should and
3486 // will provide the barrier events
3487 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003488 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003489 if (ompt_enabled.enabled) {
3490 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003491 if (ompt_frame->enter_frame.ptr == NULL)
3492 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003493 OMPT_STORE_RETURN_ADDRESS(global_tid);
3494 }
3495#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003496#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003497 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003498#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003499 retval =
3500 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3501 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3502 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003503#if OMPT_SUPPORT && OMPT_OPTIONAL
3504 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003505 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003506 }
3507#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003508
Jonathan Peyton30419822017-05-12 18:01:32 +00003509 // all other workers except master should do this pop here
3510 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3511 if (__kmp_env_consistency_check) {
3512 if (retval == 0) {
3513 __kmp_pop_sync(global_tid, ct_reduce, loc);
3514 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003515 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003516
3517 } else {
3518
3519 // should never reach this block
3520 KMP_ASSERT(0); // "unexpected method"
3521 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003522#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003523 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003524 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003525 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003526#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 KA_TRACE(
3528 10,
3529 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3530 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003531
Jonathan Peyton30419822017-05-12 18:01:32 +00003532 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003533}
3534
3535/*!
3536@ingroup SYNCHRONIZATION
3537@param loc source location information
3538@param global_tid global thread id.
3539@param lck pointer to the unique lock data structure
3540
3541Finish the execution of a reduce nowait.
3542*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003543void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3544 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003545
Jonathan Peyton30419822017-05-12 18:01:32 +00003546 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547
Jonathan Peyton30419822017-05-12 18:01:32 +00003548 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549
Jonathan Peyton30419822017-05-12 18:01:32 +00003550 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003551
Jonathan Peyton30419822017-05-12 18:01:32 +00003552 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003553
Jonathan Peyton30419822017-05-12 18:01:32 +00003554 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003555
Jonathan Peyton30419822017-05-12 18:01:32 +00003556 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003557
Jonathan Peyton30419822017-05-12 18:01:32 +00003558 // usage: if team size == 1, no synchronization is required ( on Intel
3559 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560
Jonathan Peyton30419822017-05-12 18:01:32 +00003561 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
Jonathan Peyton30419822017-05-12 18:01:32 +00003563 // neither master nor other workers should get here
3564 // (code gen does not generate this call in case 2: atomic reduce block)
3565 // actually it's better to remove this elseif at all;
3566 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003567
Jonathan Peyton30419822017-05-12 18:01:32 +00003568 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3569 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003570
Jonathan Peyton30419822017-05-12 18:01:32 +00003571 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003572
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 // should never reach this block
3576 KMP_ASSERT(0); // "unexpected method"
3577 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003578
Jonathan Peyton30419822017-05-12 18:01:32 +00003579 if (__kmp_env_consistency_check)
3580 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003581
Jonathan Peyton30419822017-05-12 18:01:32 +00003582 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3583 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003584
Jonathan Peyton30419822017-05-12 18:01:32 +00003585 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003586}
3587
3588/* 2.a.ii. Reduce Block with a terminating barrier */
3589
3590/*!
3591@ingroup SYNCHRONIZATION
3592@param loc source location information
3593@param global_tid global thread number
3594@param num_vars number of items (variables) to be reduced
3595@param reduce_size size of data in bytes to be reduced
3596@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003597@param reduce_func callback function providing reduction operation on two
3598operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003600@result 1 for the master thread, 0 for all other team threads, 2 for all team
3601threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003602
3603A blocking reduce that includes an implicit barrier.
3604*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003605kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3606 size_t reduce_size, void *reduce_data,
3607 void (*reduce_func)(void *lhs_data, void *rhs_data),
3608 kmp_critical_name *lck) {
3609 KMP_COUNT_BLOCK(REDUCE_wait);
3610 int retval = 0;
3611 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003612#if OMP_40_ENABLED
3613 kmp_info_t *th;
3614 kmp_team_t *team;
3615 int teams_swapped = 0, task_state;
3616#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003617
Jonathan Peyton30419822017-05-12 18:01:32 +00003618 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003619
Jonathan Peyton30419822017-05-12 18:01:32 +00003620 // why do we need this initialization here at all?
3621 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003622
Jonathan Peyton30419822017-05-12 18:01:32 +00003623 // do not call __kmp_serial_initialize(), it will be called by
3624 // __kmp_parallel_initialize() if needed
3625 // possible detection of false-positive race by the threadchecker ???
3626 if (!TCR_4(__kmp_init_parallel))
3627 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003628
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003629#if OMP_50_ENABLED
3630 __kmp_resume_if_soft_paused();
3631#endif
3632
Jonathan Peyton30419822017-05-12 18:01:32 +00003633// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003634#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003635 if (__kmp_env_consistency_check)
3636 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003637#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 if (__kmp_env_consistency_check)
3639 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003640#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003641
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003642#if OMP_40_ENABLED
3643 th = __kmp_thread_from_gtid(global_tid);
3644 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3645#endif // OMP_40_ENABLED
3646
Jonathan Peyton30419822017-05-12 18:01:32 +00003647 packed_reduction_method = __kmp_determine_reduction_method(
3648 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3649 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003650
Jonathan Peyton30419822017-05-12 18:01:32 +00003651 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003652
Jonathan Peyton30419822017-05-12 18:01:32 +00003653 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3654 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003655
Jonathan Peyton30419822017-05-12 18:01:32 +00003656 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003657
Jonathan Peyton30419822017-05-12 18:01:32 +00003658 // usage: if team size == 1, no synchronization is required ( Intel
3659 // platforms only )
3660 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003661
Jonathan Peyton30419822017-05-12 18:01:32 +00003662 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003663
Jonathan Peyton30419822017-05-12 18:01:32 +00003664 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003665
Jonathan Peyton30419822017-05-12 18:01:32 +00003666 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003668
Jonathan Peyton30419822017-05-12 18:01:32 +00003669// case tree_reduce_block:
3670// this barrier should be visible to a customer and to the threading profile
3671// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003672#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003673 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003674 if (ompt_enabled.enabled) {
3675 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003676 if (ompt_frame->enter_frame.ptr == NULL)
3677 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003678 OMPT_STORE_RETURN_ADDRESS(global_tid);
3679 }
3680#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003681#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 __kmp_threads[global_tid]->th.th_ident =
3683 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003684#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003685 retval =
3686 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3687 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3688 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003689#if OMPT_SUPPORT && OMPT_OPTIONAL
3690 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003691 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003692 }
3693#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003694
Jonathan Peyton30419822017-05-12 18:01:32 +00003695 // all other workers except master should do this pop here
3696 // ( none of other workers except master will enter __kmpc_end_reduce() )
3697 if (__kmp_env_consistency_check) {
3698 if (retval == 0) { // 0: all other workers; 1: master
3699 __kmp_pop_sync(global_tid, ct_reduce, loc);
3700 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003701 }
3702
Jonathan Peyton30419822017-05-12 18:01:32 +00003703 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704
Jonathan Peyton30419822017-05-12 18:01:32 +00003705 // should never reach this block
3706 KMP_ASSERT(0); // "unexpected method"
3707 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003708#if OMP_40_ENABLED
3709 if (teams_swapped) {
3710 __kmp_restore_swapped_teams(th, team, task_state);
3711 }
3712#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003713
3714 KA_TRACE(10,
3715 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3716 global_tid, packed_reduction_method, retval));
3717
3718 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003719}
3720
3721/*!
3722@ingroup SYNCHRONIZATION
3723@param loc source location information
3724@param global_tid global thread id.
3725@param lck pointer to the unique lock data structure
3726
3727Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003728The <tt>lck</tt> pointer must be the same as that used in the corresponding
3729start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003731void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3732 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003735#if OMP_40_ENABLED
3736 kmp_info_t *th;
3737 kmp_team_t *team;
3738 int teams_swapped = 0, task_state;
3739#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740
Jonathan Peyton30419822017-05-12 18:01:32 +00003741 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003742
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003743#if OMP_40_ENABLED
3744 th = __kmp_thread_from_gtid(global_tid);
3745 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3746#endif // OMP_40_ENABLED
3747
Jonathan Peyton30419822017-05-12 18:01:32 +00003748 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003749
Jonathan Peyton30419822017-05-12 18:01:32 +00003750 // this barrier should be visible to a customer and to the threading profile
3751 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754
Jonathan Peyton30419822017-05-12 18:01:32 +00003755 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003756
Jonathan Peyton30419822017-05-12 18:01:32 +00003757// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003758#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003759 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003760 if (ompt_enabled.enabled) {
3761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003762 if (ompt_frame->enter_frame.ptr == NULL)
3763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003764 OMPT_STORE_RETURN_ADDRESS(global_tid);
3765 }
3766#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003767#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003768 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003769#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003771#if OMPT_SUPPORT && OMPT_OPTIONAL
3772 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003773 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003774 }
3775#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003776
Jonathan Peyton30419822017-05-12 18:01:32 +00003777 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003778
Jonathan Peyton30419822017-05-12 18:01:32 +00003779// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003780
Jonathan Peyton30419822017-05-12 18:01:32 +00003781// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003782#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003783 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003784 if (ompt_enabled.enabled) {
3785 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003786 if (ompt_frame->enter_frame.ptr == NULL)
3787 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003788 OMPT_STORE_RETURN_ADDRESS(global_tid);
3789 }
3790#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003791#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003792 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003793#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003795#if OMPT_SUPPORT && OMPT_OPTIONAL
3796 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003797 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003798 }
3799#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003800
Jonathan Peyton30419822017-05-12 18:01:32 +00003801 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802
Joachim Protze82e94a52017-11-01 10:08:30 +00003803#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003804 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003805 if (ompt_enabled.enabled) {
3806 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003807 if (ompt_frame->enter_frame.ptr == NULL)
3808 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003809 OMPT_STORE_RETURN_ADDRESS(global_tid);
3810 }
3811#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003812// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003813#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003814 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003815#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003816 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003817#if OMPT_SUPPORT && OMPT_OPTIONAL
3818 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003819 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003820 }
3821#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003822
Jonathan Peyton30419822017-05-12 18:01:32 +00003823 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3824 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825
Jonathan Peyton30419822017-05-12 18:01:32 +00003826 // only master executes here (master releases all other workers)
3827 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3828 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003829
Jonathan Peyton30419822017-05-12 18:01:32 +00003830 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003831
Jonathan Peyton30419822017-05-12 18:01:32 +00003832 // should never reach this block
3833 KMP_ASSERT(0); // "unexpected method"
3834 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003835#if OMP_40_ENABLED
3836 if (teams_swapped) {
3837 __kmp_restore_swapped_teams(th, team, task_state);
3838 }
3839#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840
Jonathan Peyton30419822017-05-12 18:01:32 +00003841 if (__kmp_env_consistency_check)
3842 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843
Jonathan Peyton30419822017-05-12 18:01:32 +00003844 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3845 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003848}
3849
3850#undef __KMP_GET_REDUCTION_METHOD
3851#undef __KMP_SET_REDUCTION_METHOD
3852
Jonathan Peyton30419822017-05-12 18:01:32 +00003853/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003854
Jonathan Peyton30419822017-05-12 18:01:32 +00003855kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003856
Jonathan Peyton30419822017-05-12 18:01:32 +00003857 kmp_int32 gtid;
3858 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859
Jonathan Peyton30419822017-05-12 18:01:32 +00003860 gtid = __kmp_get_gtid();
3861 if (gtid < 0) {
3862 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003863 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 thread = __kmp_thread_from_gtid(gtid);
3865 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
3867} // __kmpc_get_taskid
3868
Jonathan Peyton30419822017-05-12 18:01:32 +00003869kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003870
Jonathan Peyton30419822017-05-12 18:01:32 +00003871 kmp_int32 gtid;
3872 kmp_info_t *thread;
3873 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003874
Jonathan Peyton30419822017-05-12 18:01:32 +00003875 gtid = __kmp_get_gtid();
3876 if (gtid < 0) {
3877 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003878 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003879 thread = __kmp_thread_from_gtid(gtid);
3880 parent_task = thread->th.th_current_task->td_parent;
3881 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003882
3883} // __kmpc_get_parent_taskid
3884
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003885#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003886/*!
3887@ingroup WORK_SHARING
3888@param loc source location information.
3889@param gtid global thread number.
3890@param num_dims number of associated doacross loops.
3891@param dims info on loops bounds.
3892
3893Initialize doacross loop information.
3894Expect compiler send us inclusive bounds,
3895e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3896*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003897void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003898 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003899 int j, idx;
3900 kmp_int64 last, trace_count;
3901 kmp_info_t *th = __kmp_threads[gtid];
3902 kmp_team_t *team = th->th.th_team;
3903 kmp_uint32 *flags;
3904 kmp_disp_t *pr_buf = th->th.th_dispatch;
3905 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003906
Jonathan Peyton30419822017-05-12 18:01:32 +00003907 KA_TRACE(
3908 20,
3909 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3910 gtid, num_dims, !team->t.t_serialized));
3911 KMP_DEBUG_ASSERT(dims != NULL);
3912 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003913
Jonathan Peyton30419822017-05-12 18:01:32 +00003914 if (team->t.t_serialized) {
3915 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3916 return; // no dependencies if team is serialized
3917 }
3918 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3919 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3920 // the next loop
3921 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003922
Jonathan Peyton30419822017-05-12 18:01:32 +00003923 // Save bounds info into allocated private buffer
3924 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3925 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3926 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3927 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3928 pr_buf->th_doacross_info[0] =
3929 (kmp_int64)num_dims; // first element is number of dimensions
3930 // Save also address of num_done in order to access it later without knowing
3931 // the buffer index
3932 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3933 pr_buf->th_doacross_info[2] = dims[0].lo;
3934 pr_buf->th_doacross_info[3] = dims[0].up;
3935 pr_buf->th_doacross_info[4] = dims[0].st;
3936 last = 5;
3937 for (j = 1; j < num_dims; ++j) {
3938 kmp_int64
3939 range_length; // To keep ranges of all dimensions but the first dims[0]
3940 if (dims[j].st == 1) { // most common case
3941 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3942 range_length = dims[j].up - dims[j].lo + 1;
3943 } else {
3944 if (dims[j].st > 0) {
3945 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3946 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3947 } else { // negative increment
3948 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3949 range_length =
3950 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3951 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003952 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003953 pr_buf->th_doacross_info[last++] = range_length;
3954 pr_buf->th_doacross_info[last++] = dims[j].lo;
3955 pr_buf->th_doacross_info[last++] = dims[j].up;
3956 pr_buf->th_doacross_info[last++] = dims[j].st;
3957 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003958
Jonathan Peyton30419822017-05-12 18:01:32 +00003959 // Compute total trip count.
3960 // Start with range of dims[0] which we don't need to keep in the buffer.
3961 if (dims[0].st == 1) { // most common case
3962 trace_count = dims[0].up - dims[0].lo + 1;
3963 } else if (dims[0].st > 0) {
3964 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3965 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3966 } else { // negative increment
3967 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3968 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3969 }
3970 for (j = 1; j < num_dims; ++j) {
3971 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3972 }
3973 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003974
Jonathan Peyton30419822017-05-12 18:01:32 +00003975 // Check if shared buffer is not occupied by other loop (idx -
3976 // __kmp_dispatch_num_buffers)
3977 if (idx != sh_buf->doacross_buf_idx) {
3978 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003979 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3980 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003981 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003982#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003983 // Check if we are the first thread. After the CAS the first thread gets 0,
3984 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003985 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3986 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3987 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3988#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003989 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003990 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3991#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003992 if (flags == NULL) {
3993 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003994 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003995 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3996 KMP_MB();
3997 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003998 } else if (flags == (kmp_uint32 *)1) {
3999#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00004000 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004001 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4002#else
4003 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4004#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004005 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004006 KMP_MB();
4007 } else {
4008 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004009 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00004010 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00004011 pr_buf->th_doacross_flags =
4012 sh_buf->doacross_flags; // save private copy in order to not
4013 // touch shared buffer on each iteration
4014 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004015}
4016
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004017void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004018 kmp_int32 shft, num_dims, i;
4019 kmp_uint32 flag;
4020 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4021 kmp_info_t *th = __kmp_threads[gtid];
4022 kmp_team_t *team = th->th.th_team;
4023 kmp_disp_t *pr_buf;
4024 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004025
Jonathan Peyton30419822017-05-12 18:01:32 +00004026 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4027 if (team->t.t_serialized) {
4028 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4029 return; // no dependencies if team is serialized
4030 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004031
Jonathan Peyton30419822017-05-12 18:01:32 +00004032 // calculate sequential iteration number and check out-of-bounds condition
4033 pr_buf = th->th.th_dispatch;
4034 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4035 num_dims = pr_buf->th_doacross_info[0];
4036 lo = pr_buf->th_doacross_info[2];
4037 up = pr_buf->th_doacross_info[3];
4038 st = pr_buf->th_doacross_info[4];
4039 if (st == 1) { // most common case
4040 if (vec[0] < lo || vec[0] > up) {
4041 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4042 "bounds [%lld,%lld]\n",
4043 gtid, vec[0], lo, up));
4044 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004045 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004046 iter_number = vec[0] - lo;
4047 } else if (st > 0) {
4048 if (vec[0] < lo || vec[0] > up) {
4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4050 "bounds [%lld,%lld]\n",
4051 gtid, vec[0], lo, up));
4052 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004053 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004054 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4055 } else { // negative increment
4056 if (vec[0] > lo || vec[0] < up) {
4057 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058 "bounds [%lld,%lld]\n",
4059 gtid, vec[0], lo, up));
4060 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004061 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004062 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4063 }
4064 for (i = 1; i < num_dims; ++i) {
4065 kmp_int64 iter, ln;
4066 kmp_int32 j = i * 4;
4067 ln = pr_buf->th_doacross_info[j + 1];
4068 lo = pr_buf->th_doacross_info[j + 2];
4069 up = pr_buf->th_doacross_info[j + 3];
4070 st = pr_buf->th_doacross_info[j + 4];
4071 if (st == 1) {
4072 if (vec[i] < lo || vec[i] > up) {
4073 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4074 "bounds [%lld,%lld]\n",
4075 gtid, vec[i], lo, up));
4076 return;
4077 }
4078 iter = vec[i] - lo;
4079 } else if (st > 0) {
4080 if (vec[i] < lo || vec[i] > up) {
4081 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4082 "bounds [%lld,%lld]\n",
4083 gtid, vec[i], lo, up));
4084 return;
4085 }
4086 iter = (kmp_uint64)(vec[i] - lo) / st;
4087 } else { // st < 0
4088 if (vec[i] > lo || vec[i] < up) {
4089 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4090 "bounds [%lld,%lld]\n",
4091 gtid, vec[i], lo, up));
4092 return;
4093 }
4094 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4095 }
4096 iter_number = iter + ln * iter_number;
4097 }
4098 shft = iter_number % 32; // use 32-bit granularity
4099 iter_number >>= 5; // divided by 32
4100 flag = 1 << shft;
4101 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4102 KMP_YIELD(TRUE);
4103 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004104 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004105 KA_TRACE(20,
4106 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4107 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004108}
4109
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004110void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004111 kmp_int32 shft, num_dims, i;
4112 kmp_uint32 flag;
4113 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4114 kmp_info_t *th = __kmp_threads[gtid];
4115 kmp_team_t *team = th->th.th_team;
4116 kmp_disp_t *pr_buf;
4117 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004118
Jonathan Peyton30419822017-05-12 18:01:32 +00004119 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4120 if (team->t.t_serialized) {
4121 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4122 return; // no dependencies if team is serialized
4123 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004124
Jonathan Peyton30419822017-05-12 18:01:32 +00004125 // calculate sequential iteration number (same as in "wait" but no
4126 // out-of-bounds checks)
4127 pr_buf = th->th.th_dispatch;
4128 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4129 num_dims = pr_buf->th_doacross_info[0];
4130 lo = pr_buf->th_doacross_info[2];
4131 st = pr_buf->th_doacross_info[4];
4132 if (st == 1) { // most common case
4133 iter_number = vec[0] - lo;
4134 } else if (st > 0) {
4135 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4136 } else { // negative increment
4137 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4138 }
4139 for (i = 1; i < num_dims; ++i) {
4140 kmp_int64 iter, ln;
4141 kmp_int32 j = i * 4;
4142 ln = pr_buf->th_doacross_info[j + 1];
4143 lo = pr_buf->th_doacross_info[j + 2];
4144 st = pr_buf->th_doacross_info[j + 4];
4145 if (st == 1) {
4146 iter = vec[i] - lo;
4147 } else if (st > 0) {
4148 iter = (kmp_uint64)(vec[i] - lo) / st;
4149 } else { // st < 0
4150 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004151 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004152 iter_number = iter + ln * iter_number;
4153 }
4154 shft = iter_number % 32; // use 32-bit granularity
4155 iter_number >>= 5; // divided by 32
4156 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004157 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004158 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004159 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004160 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4161 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004162}
4163
Jonathan Peyton30419822017-05-12 18:01:32 +00004164void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004165 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004166 kmp_info_t *th = __kmp_threads[gtid];
4167 kmp_team_t *team = th->th.th_team;
4168 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004169
Jonathan Peyton30419822017-05-12 18:01:32 +00004170 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4171 if (team->t.t_serialized) {
4172 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4173 return; // nothing to do
4174 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004175 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004176 if (num_done == th->th.th_team_nproc) {
4177 // we are the last thread, need to free shared resources
4178 int idx = pr_buf->th_doacross_buf_idx - 1;
4179 dispatch_shared_info_t *sh_buf =
4180 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4181 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4182 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004183 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004184 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004185 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004186 sh_buf->doacross_flags = NULL;
4187 sh_buf->doacross_num_done = 0;
4188 sh_buf->doacross_buf_idx +=
4189 __kmp_dispatch_num_buffers; // free buffer for future re-use
4190 }
4191 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004192 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004193 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4194 pr_buf->th_doacross_info = NULL;
4195 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004196}
4197#endif
4198
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004199#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004200int __kmpc_get_target_offload(void) {
4201 if (!__kmp_init_serial) {
4202 __kmp_serial_initialize();
4203 }
4204 return __kmp_target_offload;
4205}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004206
4207int __kmpc_pause_resource(kmp_pause_status_t level) {
4208 if (!__kmp_init_serial) {
4209 return 1; // Can't pause if runtime is not initialized
4210 }
4211 return __kmp_pause_resource(level);
4212}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004213#endif // OMP_50_ENABLED
4214
Jim Cownie5e8470a2013-09-27 10:38:44 +00004215// end of file //