blob: 1fd912e78af1818b34c2911eb4f235f0899383b2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jonathan Peyton92ca6182018-09-07 18:25:49 +000013#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jonathan Peyton8bb8a922018-10-02 19:15:04 +000073#if KMP_OS_WINDOWS && OMPT_SUPPORT
74 // Normal exit process on Windows does not allow worker threads of the final
75 // parallel region to finish reporting their events, so shutting down the
76 // library here fixes the issue at least for the cases where __kmpc_end() is
77 // placed properly.
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
80#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
83/*!
84@ingroup THREAD_STATES
85@param loc Source location information.
86@return The global thread index of the active thread.
87
88This function can be called in any context.
89
90If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000091single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
92that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000093active parallel construct. (Or zero if there is no active parallel
94construct, since the master thread is necessarily thread zero).
95
96If multiple non-OpenMP threads all enter an OpenMP construct then this
97will be a unique thread identifier among all the threads created by
98the OpenMP runtime (but the value cannote be defined in terms of
99OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000101kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
102 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107}
108
109/*!
110@ingroup THREAD_STATES
111@param loc Source location information.
112@return The number of threads under control of the OpenMP<sup>*</sup> runtime
113
114This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000115It returns the total number of threads under the control of the OpenMP runtime.
116That is not a number that can be determined by any OpenMP standard calls, since
117the library may be called from more than one non-OpenMP thread, and this
118reflects the total over all such calls. Similarly the runtime maintains
119underlying threads even when they are not active (since the cost of creating
120and destroying OS threads is high), this call counts all such threads even if
121they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000123kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
124 KC_TRACE(10,
125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128}
129
130/*!
131@ingroup THREAD_STATES
132@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000133@return The thread number of the calling thread in the innermost active parallel
134construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000136kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000146kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148
Jonathan Peyton30419822017-05-12 18:01:32 +0000149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150}
151
152/*!
153 * @ingroup DEPRECATED
154 * @param loc location description
155 *
156 * This function need not be called. It always returns TRUE.
157 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000158kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159#ifndef KMP_DEBUG
160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
163#else
164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 const char *semi2;
166 const char *semi3;
167 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168
Jonathan Peyton30419822017-05-12 18:01:32 +0000169 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000171 }
172 semi2 = loc->psource;
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 semi2 = strchr(semi2, ';');
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2 + 1, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187 name--;
188 }
189 if ((*name == '/') || (*name == ';')) {
190 name++;
191 }
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
194 }
195 }
196 semi3 = strchr(semi2 + 1, ';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
201 }
202 }
203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
206 }
207 return __kmp_par_range < 0;
208 }
209 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210
211#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000217@return 1 if this thread is executing inside an active parallel region, zero if
218not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000220kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222}
223
224/*!
225@ingroup PARALLEL
226@param loc source location information
227@param global_tid global thread number
228@param num_threads number of threads requested for this parallel construct
229
230Set the number of threads to be used by the next fork spawned by this thread.
231This call is only required if the parallel construct has a `num_threads` clause.
232*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234 kmp_int32 num_threads) {
235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237
Jonathan Peyton30419822017-05-12 18:01:32 +0000238 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239}
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245}
246
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#if OMP_40_ENABLED
248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255}
256
257#endif /* OMP_40_ENABLED */
258
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259/*!
260@ingroup PARALLEL
261@param loc source location information
262@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000263@param microtask pointer to callback routine consisting of outlined parallel
264construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265@param ... pointers to shared variables that aren't global
266
267Do the actual fork and call the microtask in the relevant number of threads.
268*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000269void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000271
Jonathan Peyton61118492016-05-20 19:03:38 +0000272#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000273 // If we were in a serial region, then stop the serial timer, record
274 // the event, and start parallel region timer
275 stats_state_e previous_state = KMP_GET_THREAD_STATE();
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278 } else {
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000281 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 if (inParallel) {
283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284 } else {
285 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000286 }
287#endif
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 // maybe to save thr_state is enough here
290 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 va_list ap;
292 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000295 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000296 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300 if (lwt)
301 ompt_frame = &(lwt->ompt_task_info.frame);
302 else {
303 int tid = __kmp_tid_from_gtid(gtid);
304 ompt_frame = &(
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000308 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000309 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000310#endif
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312#if INCLUDE_SSC_MARKS
313 SSC_MARK_FORKING();
314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000319#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325#if INCLUDE_SSC_MARKS
326 SSC_MARK_JOINING();
327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000329#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000330 ,
331 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000337
338#if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341 } else {
342 KMP_POP_PARTITIONED_TIMER();
343 }
344#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
347#if OMP_40_ENABLED
348/*!
349@ingroup PARALLEL
350@param loc source location information
351@param global_tid global thread number
352@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000353@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354
355Set the number of teams to be used by the teams construct.
356This call is only required if the teams construct has a `num_teams` clause
357or a `thread_limit` clause (or both).
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360 kmp_int32 num_teams, kmp_int32 num_threads) {
361 KA_TRACE(20,
362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364
Jonathan Peyton30419822017-05-12 18:01:32 +0000365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366}
367
368/*!
369@ingroup PARALLEL
370@param loc source location information
371@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000372@param microtask pointer to callback routine consisting of outlined teams
373construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374@param ... pointers to shared variables that aren't global
375
376Do the actual fork and call the microtask in the relevant number of threads.
377*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000378void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379 ...) {
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
382 va_list ap;
383 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000386
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 // remember teams entry point and nesting level
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000391
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000392#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000395 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Jonathan Peyton30419822017-05-12 18:01:32 +0000398 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000399 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000400#endif
401
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 // check if __kmpc_push_num_teams called, set default number of teams
403 // otherwise
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
406 }
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 VOLATILE_CAST(microtask_t)
413 __kmp_teams_master, // "wrapped" task
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000415#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000416 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 );
421 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000422#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000423 ,
424 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000425#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000427
Jonathan Peyton30419822017-05-12 18:01:32 +0000428 this_thr->th.th_teams_microtask = NULL;
429 this_thr->th.th_teams_level = 0;
430 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
431 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000432}
433#endif /* OMP_40_ENABLED */
434
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435// I don't think this function should ever have been exported.
436// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
437// openmp code ever called it, but it's been exported from the RTL for so
438// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000439int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440
441/*!
442@ingroup PARALLEL
443@param loc source location information
444@param global_tid global thread number
445
446Enter a serialized parallel construct. This interface is used to handle a
447conditional parallel region, like this,
448@code
449#pragma omp parallel if (condition)
450@endcode
451when the condition is false.
452*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000453void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000454// The implementation is now in kmp_runtime.cpp so that it can share static
455// functions with kmp_fork_call since the tasks to be done are similar in
456// each case.
457#if OMPT_SUPPORT
458 OMPT_STORE_RETURN_ADDRESS(global_tid);
459#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000460 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000461}
462
463/*!
464@ingroup PARALLEL
465@param loc source location information
466@param global_tid global thread number
467
468Leave a serialized parallel construct.
469*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000470void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
471 kmp_internal_control_t *top;
472 kmp_info_t *this_thr;
473 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Jonathan Peyton30419822017-05-12 18:01:32 +0000475 KC_TRACE(10,
476 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000477
Jonathan Peyton30419822017-05-12 18:01:32 +0000478 /* skip all this code for autopar serialized loops since it results in
479 unacceptable overhead */
480 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
481 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482
Jonathan Peyton30419822017-05-12 18:01:32 +0000483 // Not autopar code
484 if (!TCR_4(__kmp_init_parallel))
485 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000486
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000487#if OMP_50_ENABLED
488 __kmp_resume_if_soft_paused();
489#endif
490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 this_thr = __kmp_threads[global_tid];
492 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493
Jonathan Peyton30419822017-05-12 18:01:32 +0000494#if OMP_45_ENABLED
495 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000496
Jonathan Peyton30419822017-05-12 18:01:32 +0000497 // we need to wait for the proxy tasks before finishing the thread
498 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
499 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
500#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000501
Jonathan Peyton30419822017-05-12 18:01:32 +0000502 KMP_MB();
503 KMP_DEBUG_ASSERT(serial_team);
504 KMP_ASSERT(serial_team->t.t_serialized);
505 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
506 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
507 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
508 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509
Joachim Protze82e94a52017-11-01 10:08:30 +0000510#if OMPT_SUPPORT
511 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000512 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
513 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +0000514 if (ompt_enabled.ompt_callback_implicit_task) {
515 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
516 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +0000517 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
Joachim Protze82e94a52017-11-01 10:08:30 +0000518 }
519
520 // reset clear the task id only after unlinking the task
521 ompt_data_t *parent_task_data;
522 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
523
524 if (ompt_enabled.ompt_callback_parallel_end) {
525 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
526 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000527 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000528 }
529 __ompt_lw_taskteam_unlink(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000530 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +0000531 }
532#endif
533
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 /* If necessary, pop the internal control stack values and replace the team
535 * values */
536 top = serial_team->t.t_control_stack_top;
537 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
538 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
539 serial_team->t.t_control_stack_top = top->next;
540 __kmp_free(top);
541 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542
Jonathan Peyton30419822017-05-12 18:01:32 +0000543 // if( serial_team -> t.t_serialized > 1 )
544 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545
Jonathan Peyton30419822017-05-12 18:01:32 +0000546 /* pop dispatch buffers stack */
547 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
548 {
549 dispatch_private_info_t *disp_buffer =
550 serial_team->t.t_dispatch->th_disp_buffer;
551 serial_team->t.t_dispatch->th_disp_buffer =
552 serial_team->t.t_dispatch->th_disp_buffer->next;
553 __kmp_free(disp_buffer);
554 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000555#if OMP_50_ENABLED
556 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
557#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 --serial_team->t.t_serialized;
560 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000561
Jonathan Peyton30419822017-05-12 18:01:32 +0000562/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000563
564#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000565 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
566 __kmp_clear_x87_fpu_status_word();
567 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
568 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
569 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000570#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 this_thr->th.th_team = serial_team->t.t_parent;
573 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575 /* restore values cached in the thread */
576 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
577 this_thr->th.th_team_master =
578 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
579 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000580
Jonathan Peyton30419822017-05-12 18:01:32 +0000581 /* TODO the below shouldn't need to be adjusted for serialized teams */
582 this_thr->th.th_dispatch =
583 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584
Jonathan Peyton30419822017-05-12 18:01:32 +0000585 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586
Jonathan Peyton30419822017-05-12 18:01:32 +0000587 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
588 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 if (__kmp_tasking_mode != tskm_immediate_exec) {
591 // Copy the task team from the new child / old parent team to the thread.
592 this_thr->th.th_task_team =
593 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
594 KA_TRACE(20,
595 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
596 "team %p\n",
597 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000599 } else {
600 if (__kmp_tasking_mode != tskm_immediate_exec) {
601 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
602 "depth of serial team %p to %d\n",
603 global_tid, serial_team, serial_team->t.t_serialized));
604 }
605 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606
Jonathan Peyton30419822017-05-12 18:01:32 +0000607 if (__kmp_env_consistency_check)
608 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000609#if OMPT_SUPPORT
610 if (ompt_enabled.enabled)
611 this_thr->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000612 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
613 : ompt_state_work_parallel);
Joachim Protze82e94a52017-11-01 10:08:30 +0000614#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615}
616
617/*!
618@ingroup SYNCHRONIZATION
619@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000620
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000621Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000622depending on the memory ordering convention obeyed by the compiler
623even that may not be necessary).
624*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000625void __kmpc_flush(ident_t *loc) {
626 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627
Jonathan Peyton30419822017-05-12 18:01:32 +0000628 /* need explicit __mf() here since use volatile instead in library */
629 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000630
Jonathan Peyton30419822017-05-12 18:01:32 +0000631#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
632#if KMP_MIC
633// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
634// We shouldn't need it, though, since the ABI rules require that
635// * If the compiler generates NGO stores it also generates the fence
636// * If users hand-code NGO stores they should insert the fence
637// therefore no incomplete unordered stores should be visible.
638#else
639 // C74404
640 // This is to address non-temporal store instructions (sfence needed).
641 // The clflush instruction is addressed either (mfence needed).
642 // Probably the non-temporal load monvtdqa instruction should also be
643 // addressed.
644 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
645 if (!__kmp_cpuinfo.initialized) {
646 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000647 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000648 if (!__kmp_cpuinfo.sse2) {
649 // CPU cannot execute SSE2 instructions.
650 } else {
651#if KMP_COMPILER_ICC
652 _mm_mfence();
653#elif KMP_COMPILER_MSVC
654 MemoryBarrier();
655#else
656 __sync_synchronize();
657#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000658 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000659#endif // KMP_MIC
660#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
661// Nothing to see here move along
662#elif KMP_ARCH_PPC64
663// Nothing needed here (we have a real MB above).
664#if KMP_OS_CNK
665 // The flushing thread needs to yield here; this prevents a
666 // busy-waiting thread from saturating the pipeline. flush is
667 // often used in loops like this:
668 // while (!flag) {
669 // #pragma omp flush(flag)
670 // }
671 // and adding the yield here is good for at least a 10x speedup
672 // when running >2 threads per core (on the NAS LU benchmark).
673 __kmp_yield(TRUE);
674#endif
675#else
676#error Unknown or unsupported architecture
677#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000678
679#if OMPT_SUPPORT && OMPT_OPTIONAL
680 if (ompt_enabled.ompt_callback_flush) {
681 ompt_callbacks.ompt_callback(ompt_callback_flush)(
682 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
683 }
684#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685}
686
687/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000688/*!
689@ingroup SYNCHRONIZATION
690@param loc source location information
691@param global_tid thread id.
692
693Execute a barrier.
694*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000695void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
696 KMP_COUNT_BLOCK(OMP_BARRIER);
697 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698
Jonathan Peyton30419822017-05-12 18:01:32 +0000699 if (!TCR_4(__kmp_init_parallel))
700 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000702#if OMP_50_ENABLED
703 __kmp_resume_if_soft_paused();
704#endif
705
Jonathan Peyton30419822017-05-12 18:01:32 +0000706 if (__kmp_env_consistency_check) {
707 if (loc == 0) {
708 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000709 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 __kmp_check_barrier(global_tid, ct_barrier, loc);
712 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713
Joachim Protze82e94a52017-11-01 10:08:30 +0000714#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000715 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000716 if (ompt_enabled.enabled) {
717 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000718 if (ompt_frame->enter_frame.ptr == NULL)
719 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000720 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000721 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000722#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000723 __kmp_threads[global_tid]->th.th_ident = loc;
724 // TODO: explicit barrier_wait_id:
725 // this function is called when 'barrier' directive is present or
726 // implicit barrier at the end of a worksharing construct.
727 // 1) better to add a per-thread barrier counter to a thread data structure
728 // 2) set to 0 when a new team is created
729 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000732#if OMPT_SUPPORT && OMPT_OPTIONAL
733 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000734 ompt_frame->enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000736#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737}
738
739/* The BARRIER for a MASTER section is always explicit */
740/*!
741@ingroup WORK_SHARING
742@param loc source location information.
743@param global_tid global thread number .
744@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
745*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000746kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
747 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748
Jonathan Peyton30419822017-05-12 18:01:32 +0000749 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 if (!TCR_4(__kmp_init_parallel))
752 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000754#if OMP_50_ENABLED
755 __kmp_resume_if_soft_paused();
756#endif
757
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 if (KMP_MASTER_GTID(global_tid)) {
759 KMP_COUNT_BLOCK(OMP_MASTER);
760 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
761 status = 1;
762 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Joachim Protze82e94a52017-11-01 10:08:30 +0000764#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000766 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000767 kmp_info_t *this_thr = __kmp_threads[global_tid];
768 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000769
Jonathan Peyton30419822017-05-12 18:01:32 +0000770 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000771 ompt_callbacks.ompt_callback(ompt_callback_master)(
772 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
774 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000775 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000777#endif
778
Jonathan Peyton30419822017-05-12 18:01:32 +0000779 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000780#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 if (status)
782 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
783 else
784 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000785#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 if (status)
787 __kmp_push_sync(global_tid, ct_master, loc, NULL);
788 else
789 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792
Jonathan Peyton30419822017-05-12 18:01:32 +0000793 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794}
795
796/*!
797@ingroup WORK_SHARING
798@param loc source location information.
799@param global_tid global thread number .
800
Jonathan Peyton30419822017-05-12 18:01:32 +0000801Mark the end of a <tt>master</tt> region. This should only be called by the
802thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000804void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
805 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
808 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000809
Joachim Protze82e94a52017-11-01 10:08:30 +0000810#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 kmp_info_t *this_thr = __kmp_threads[global_tid];
812 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000813 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000815 ompt_callbacks.ompt_callback(ompt_callback_master)(
816 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
817 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
818 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000819 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000820#endif
821
Jonathan Peyton30419822017-05-12 18:01:32 +0000822 if (__kmp_env_consistency_check) {
823 if (global_tid < 0)
824 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 if (KMP_MASTER_GTID(global_tid))
827 __kmp_pop_sync(global_tid, ct_master, loc);
828 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000829}
830
831/*!
832@ingroup WORK_SHARING
833@param loc source location information.
834@param gtid global thread number.
835
836Start execution of an <tt>ordered</tt> construct.
837*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000838void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
839 int cid = 0;
840 kmp_info_t *th;
841 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844
Jonathan Peyton30419822017-05-12 18:01:32 +0000845 if (!TCR_4(__kmp_init_parallel))
846 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000848#if OMP_50_ENABLED
849 __kmp_resume_if_soft_paused();
850#endif
851
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000853 __kmp_itt_ordered_prep(gtid);
854// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000855#endif /* USE_ITT_BUILD */
856
Jonathan Peyton30419822017-05-12 18:01:32 +0000857 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
Joachim Protze82e94a52017-11-01 10:08:30 +0000859#if OMPT_SUPPORT && OMPT_OPTIONAL
860 kmp_team_t *team;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000861 ompt_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000862 void *codeptr_ra;
863 if (ompt_enabled.enabled) {
864 OMPT_STORE_RETURN_ADDRESS(gtid);
865 team = __kmp_team_from_gtid(gtid);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000866 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000867 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000868 th->th.ompt_thread_info.wait_id = lck;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000869 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000870
Jonathan Peyton30419822017-05-12 18:01:32 +0000871 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000872 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
873 if (ompt_enabled.ompt_callback_mutex_acquire) {
874 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000875 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000876 (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000877 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000878 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000879#endif
880
Jonathan Peyton30419822017-05-12 18:01:32 +0000881 if (th->th.th_dispatch->th_deo_fcn != 0)
882 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
883 else
884 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000885
Joachim Protze82e94a52017-11-01 10:08:30 +0000886#if OMPT_SUPPORT && OMPT_OPTIONAL
887 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 /* OMPT state update */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000889 th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000890 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000891
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000893 if (ompt_enabled.ompt_callback_mutex_acquired) {
894 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000895 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000896 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000897 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000898#endif
899
Jim Cownie5e8470a2013-09-27 10:38:44 +0000900#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000901 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000902#endif /* USE_ITT_BUILD */
903}
904
905/*!
906@ingroup WORK_SHARING
907@param loc source location information.
908@param gtid global thread number.
909
910End execution of an <tt>ordered</tt> construct.
911*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000912void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
913 int cid = 0;
914 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000915
Jonathan Peyton30419822017-05-12 18:01:32 +0000916 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000917
918#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000919 __kmp_itt_ordered_end(gtid);
920// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000921#endif /* USE_ITT_BUILD */
922
Jonathan Peyton30419822017-05-12 18:01:32 +0000923 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924
Jonathan Peyton30419822017-05-12 18:01:32 +0000925 if (th->th.th_dispatch->th_dxo_fcn != 0)
926 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
927 else
928 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000929
Joachim Protze82e94a52017-11-01 10:08:30 +0000930#if OMPT_SUPPORT && OMPT_OPTIONAL
931 OMPT_STORE_RETURN_ADDRESS(gtid);
932 if (ompt_enabled.ompt_callback_mutex_released) {
933 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
934 ompt_mutex_ordered,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +0000935 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000936 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000937 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000938#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000939}
940
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000941#if KMP_USE_DYNAMIC_LOCK
942
Jonathan Peytondae13d82015-12-11 21:57:06 +0000943static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000944__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
945 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
946 // Pointer to the allocated indirect lock is written to crit, while indexing
947 // is ignored.
948 void *idx;
949 kmp_indirect_lock_t **lck;
950 lck = (kmp_indirect_lock_t **)crit;
951 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
952 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
953 KMP_SET_I_LOCK_LOCATION(ilk, loc);
954 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
955 KA_TRACE(20,
956 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000957#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000958 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000959#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000960 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000961 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000962#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000963 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000964#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000965 // We don't really need to destroy the unclaimed lock here since it will be
966 // cleaned up at program exit.
967 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
968 }
969 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000970}
971
972// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000973#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
974 { \
975 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000976 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
977 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
978 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
979 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000980 kmp_uint32 spins; \
981 KMP_FSYNC_PREPARE(l); \
982 KMP_INIT_YIELD(spins); \
983 if (TCR_4(__kmp_nth) > \
984 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
985 KMP_YIELD(TRUE); \
986 } else { \
987 KMP_YIELD_SPIN(spins); \
988 } \
989 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000990 while ( \
991 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000993 __kmp_spin_backoff(&backoff); \
994 if (TCR_4(__kmp_nth) > \
995 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
996 KMP_YIELD(TRUE); \
997 } else { \
998 KMP_YIELD_SPIN(spins); \
999 } \
1000 } \
1001 } \
1002 KMP_FSYNC_ACQUIRED(l); \
1003 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001004
1005// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001006#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1007 { \
1008 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001009 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1010 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1011 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1012 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +00001013 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001014
1015// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001016#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001017 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001018
Jonathan Peytondae13d82015-12-11 21:57:06 +00001019#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001020
Jonathan Peyton30419822017-05-12 18:01:32 +00001021#include <sys/syscall.h>
1022#include <unistd.h>
1023#ifndef FUTEX_WAIT
1024#define FUTEX_WAIT 0
1025#endif
1026#ifndef FUTEX_WAKE
1027#define FUTEX_WAKE 1
1028#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001029
1030// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001031#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1032 { \
1033 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1034 kmp_int32 gtid_code = (gtid + 1) << 1; \
1035 KMP_MB(); \
1036 KMP_FSYNC_PREPARE(ftx); \
1037 kmp_int32 poll_val; \
1038 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1039 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1040 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1041 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1042 if (!cond) { \
1043 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1044 poll_val | \
1045 KMP_LOCK_BUSY(1, futex))) { \
1046 continue; \
1047 } \
1048 poll_val |= KMP_LOCK_BUSY(1, futex); \
1049 } \
1050 kmp_int32 rc; \
1051 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1052 NULL, NULL, 0)) != 0) { \
1053 continue; \
1054 } \
1055 gtid_code |= 1; \
1056 } \
1057 KMP_FSYNC_ACQUIRED(ftx); \
1058 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001059
1060// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001061#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1062 { \
1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1064 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1065 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1066 KMP_FSYNC_ACQUIRED(ftx); \
1067 rc = TRUE; \
1068 } else { \
1069 rc = FALSE; \
1070 } \
1071 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001072
1073// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001074#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1075 { \
1076 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1077 KMP_MB(); \
1078 KMP_FSYNC_RELEASING(ftx); \
1079 kmp_int32 poll_val = \
1080 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1081 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1082 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1083 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1084 } \
1085 KMP_MB(); \
1086 KMP_YIELD(TCR_4(__kmp_nth) > \
1087 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1088 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001089
Jonathan Peytondae13d82015-12-11 21:57:06 +00001090#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001091
1092#else // KMP_USE_DYNAMIC_LOCK
1093
Jonathan Peyton30419822017-05-12 18:01:32 +00001094static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1095 ident_t const *loc,
1096 kmp_int32 gtid) {
1097 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001098
Jonathan Peyton30419822017-05-12 18:01:32 +00001099 // Because of the double-check, the following load doesn't need to be volatile
1100 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001101
Jonathan Peyton30419822017-05-12 18:01:32 +00001102 if (lck == NULL) {
1103 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001104
Jonathan Peyton30419822017-05-12 18:01:32 +00001105 // Allocate & initialize the lock.
1106 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1107 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1108 __kmp_init_user_lock_with_checks(lck);
1109 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 __kmp_itt_critical_creating(lck);
1112// __kmp_itt_critical_creating() should be called *before* the first usage
1113// of underlying lock. It is the only place where we can guarantee it. There
1114// are chances the lock will destroyed with no usage, but it is not a
1115// problem, because this is not real event seen by user but rather setting
1116// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117#endif /* USE_ITT_BUILD */
1118
Jonathan Peyton30419822017-05-12 18:01:32 +00001119 // Use a cmpxchg instruction to slam the start of the critical section with
1120 // the lock pointer. If another thread beat us to it, deallocate the lock,
1121 // and use the lock that the other thread allocated.
1122 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123
Jonathan Peyton30419822017-05-12 18:01:32 +00001124 if (status == 0) {
1125// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001126#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001127 __kmp_itt_critical_destroyed(lck);
1128// Let ITT know the lock is destroyed and the same memory location may be reused
1129// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 __kmp_destroy_user_lock_with_checks(lck);
1132 __kmp_user_lock_free(&idx, gtid, lck);
1133 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1134 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001135 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001136 }
1137 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001138}
1139
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001140#endif // KMP_USE_DYNAMIC_LOCK
1141
Jim Cownie5e8470a2013-09-27 10:38:44 +00001142/*!
1143@ingroup WORK_SHARING
1144@param loc source location information.
1145@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001146@param crit identity of the critical section. This could be a pointer to a lock
1147associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148
1149Enter code protected by a `critical` construct.
1150This function blocks until the executing thread can enter the critical section.
1151*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001152void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1153 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001154#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001155#if OMPT_SUPPORT && OMPT_OPTIONAL
1156 OMPT_STORE_RETURN_ADDRESS(global_tid);
1157#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001158 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001159#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001160 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001161#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001162 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001163 ompt_thread_info_t ti;
1164#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001165 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001166
Jonathan Peyton30419822017-05-12 18:01:32 +00001167 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168
Jonathan Peyton30419822017-05-12 18:01:32 +00001169 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001170
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001171 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001172 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001173
Jonathan Peyton30419822017-05-12 18:01:32 +00001174 if ((__kmp_user_lock_kind == lk_tas) &&
1175 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1176 lck = (kmp_user_lock_p)crit;
1177 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001178#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001179 else if ((__kmp_user_lock_kind == lk_futex) &&
1180 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1181 lck = (kmp_user_lock_p)crit;
1182 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 else { // ticket, queuing or drdpa
1185 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1186 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001187
Jonathan Peyton30419822017-05-12 18:01:32 +00001188 if (__kmp_env_consistency_check)
1189 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001190
Jonathan Peyton30419822017-05-12 18:01:32 +00001191// since the critical directive binds to all threads, not just the current
1192// team we have to check this even if we are in a serialized team.
1193// also, even if we are the uber thread, we still have to conduct the lock,
1194// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001195
1196#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001197 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001198#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001199#if OMPT_SUPPORT && OMPT_OPTIONAL
1200 OMPT_STORE_RETURN_ADDRESS(gtid);
1201 void *codeptr_ra = NULL;
1202 if (ompt_enabled.enabled) {
1203 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1204 /* OMPT state update */
1205 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001206 ti.wait_id = (ompt_wait_id_t)lck;
1207 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001208
1209 /* OMPT event callback */
1210 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1211 if (ompt_enabled.ompt_callback_mutex_acquire) {
1212 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1213 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001214 (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001215 }
1216 }
1217#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 // Value of 'crit' should be good for using as a critical_id of the critical
1219 // section directive.
1220 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001222#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001223 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001224#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001225#if OMPT_SUPPORT && OMPT_OPTIONAL
1226 if (ompt_enabled.enabled) {
1227 /* OMPT state update */
1228 ti.state = prev_state;
1229 ti.wait_id = 0;
1230
1231 /* OMPT event callback */
1232 if (ompt_enabled.ompt_callback_mutex_acquired) {
1233 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001234 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001235 }
1236 }
1237#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001238 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001239
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001240 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001241 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001242#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001243}
1244
1245#if KMP_USE_DYNAMIC_LOCK
1246
1247// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001248static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001249#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001250#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001251#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001252#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001253#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001254
1255#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001256#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001257#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001258#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001259#endif
1260
Jonathan Peyton30419822017-05-12 18:01:32 +00001261 // Hints that do not require further logic
1262 if (hint & kmp_lock_hint_hle)
1263 return KMP_TSX_LOCK(hle);
1264 if (hint & kmp_lock_hint_rtm)
1265 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1266 if (hint & kmp_lock_hint_adaptive)
1267 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001268
Jonathan Peyton30419822017-05-12 18:01:32 +00001269 // Rule out conflicting hints first by returning the default lock
1270 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001271 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001272 if ((hint & omp_lock_hint_speculative) &&
1273 (hint & omp_lock_hint_nonspeculative))
1274 return __kmp_user_lock_seq;
1275
1276 // Do not even consider speculation when it appears to be contended
1277 if (hint & omp_lock_hint_contended)
1278 return lockseq_queuing;
1279
1280 // Uncontended lock without speculation
1281 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1282 return lockseq_tas;
1283
1284 // HLE lock for speculation
1285 if (hint & omp_lock_hint_speculative)
1286 return KMP_TSX_LOCK(hle);
1287
1288 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001289}
1290
Joachim Protze82e94a52017-11-01 10:08:30 +00001291#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001292#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001293static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001294__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1295 if (user_lock) {
1296 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1297 case 0:
1298 break;
1299#if KMP_USE_FUTEX
1300 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001301 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001302#endif
1303 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001304 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001305#if KMP_USE_TSX
1306 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001307 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001308#endif
1309 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001310 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001311 }
1312 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1313 }
1314 KMP_ASSERT(ilock);
1315 switch (ilock->type) {
1316#if KMP_USE_TSX
1317 case locktag_adaptive:
1318 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001319 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001320#endif
1321 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001322 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001323#if KMP_USE_FUTEX
1324 case locktag_nested_futex:
1325#endif
1326 case locktag_ticket:
1327 case locktag_queuing:
1328 case locktag_drdpa:
1329 case locktag_nested_ticket:
1330 case locktag_nested_queuing:
1331 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001332 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001333 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001334 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001335 }
1336}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001337#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001338// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001339static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001340 switch (__kmp_user_lock_kind) {
1341 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001342 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001343#if KMP_USE_FUTEX
1344 case lk_futex:
1345#endif
1346 case lk_ticket:
1347 case lk_queuing:
1348 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001349 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001350#if KMP_USE_TSX
1351 case lk_hle:
1352 case lk_rtm:
1353 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001354 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001355#endif
1356 default:
Joachim Protze2b46d302019-01-15 15:36:53 +00001357 return kmp_mutex_impl_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001358 }
1359}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001360#endif // KMP_USE_DYNAMIC_LOCK
1361#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001362
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001363/*!
1364@ingroup WORK_SHARING
1365@param loc source location information.
1366@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001367@param crit identity of the critical section. This could be a pointer to a lock
1368associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001369@param hint the lock hint.
1370
Jonathan Peyton30419822017-05-12 18:01:32 +00001371Enter code protected by a `critical` construct with a hint. The hint value is
1372used to suggest a lock implementation. This function blocks until the executing
1373thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001374speculative execution and the hardware supports it.
1375*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001376void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001377 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001378 KMP_COUNT_BLOCK(OMP_CRITICAL);
1379 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001380#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001381 ompt_state_t prev_state = ompt_state_undefined;
Joachim Protze82e94a52017-11-01 10:08:30 +00001382 ompt_thread_info_t ti;
1383 // This is the case, if called from __kmpc_critical:
1384 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1385 if (!codeptr)
1386 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1387#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001388
Jonathan Peyton30419822017-05-12 18:01:32 +00001389 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001390
Jonathan Peyton30419822017-05-12 18:01:32 +00001391 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1392 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001393 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001394 if (*lk == 0) {
1395 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1396 if (KMP_IS_D_LOCK(lckseq)) {
1397 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1398 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001399 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001400 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001401 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 }
1403 // Branch for accessing the actual lock object and set operation. This
1404 // branching is inevitable since this lock initialization does not follow the
1405 // normal dispatch path (lock table is not used).
1406 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1407 lck = (kmp_user_lock_p)lk;
1408 if (__kmp_env_consistency_check) {
1409 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1410 __kmp_map_hint_to_lock(hint));
1411 }
1412#if USE_ITT_BUILD
1413 __kmp_itt_critical_acquiring(lck);
1414#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001415#if OMPT_SUPPORT && OMPT_OPTIONAL
1416 if (ompt_enabled.enabled) {
1417 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1418 /* OMPT state update */
1419 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001420 ti.wait_id = (ompt_wait_id_t)lck;
1421 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001422
1423 /* OMPT event callback */
1424 if (ompt_enabled.ompt_callback_mutex_acquire) {
1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1426 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001427 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001428 }
1429 }
1430#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001431#if KMP_USE_INLINED_TAS
1432 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1433 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1434 } else
1435#elif KMP_USE_INLINED_FUTEX
1436 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1437 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1438 } else
1439#endif
1440 {
1441 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1442 }
1443 } else {
1444 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1445 lck = ilk->lock;
1446 if (__kmp_env_consistency_check) {
1447 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1448 __kmp_map_hint_to_lock(hint));
1449 }
1450#if USE_ITT_BUILD
1451 __kmp_itt_critical_acquiring(lck);
1452#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001453#if OMPT_SUPPORT && OMPT_OPTIONAL
1454 if (ompt_enabled.enabled) {
1455 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1456 /* OMPT state update */
1457 prev_state = ti.state;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001458 ti.wait_id = (ompt_wait_id_t)lck;
1459 ti.state = ompt_state_wait_critical;
Joachim Protze82e94a52017-11-01 10:08:30 +00001460
1461 /* OMPT event callback */
1462 if (ompt_enabled.ompt_callback_mutex_acquire) {
1463 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1464 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001465 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001466 }
1467 }
1468#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001469 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1470 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001471 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001472
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001474 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001476#if OMPT_SUPPORT && OMPT_OPTIONAL
1477 if (ompt_enabled.enabled) {
1478 /* OMPT state update */
1479 ti.state = prev_state;
1480 ti.wait_id = 0;
1481
1482 /* OMPT event callback */
1483 if (ompt_enabled.ompt_callback_mutex_acquired) {
1484 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001485 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001486 }
1487 }
1488#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489
Jonathan Peyton30419822017-05-12 18:01:32 +00001490 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1491 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001492} // __kmpc_critical_with_hint
1493
1494#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001495
1496/*!
1497@ingroup WORK_SHARING
1498@param loc source location information.
1499@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001500@param crit identity of the critical section. This could be a pointer to a lock
1501associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001502
1503Leave a critical section, releasing any lock that was held during its execution.
1504*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001505void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1506 kmp_critical_name *crit) {
1507 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508
Jonathan Peyton30419822017-05-12 18:01:32 +00001509 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001510
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001511#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001512 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1513 lck = (kmp_user_lock_p)crit;
1514 KMP_ASSERT(lck != NULL);
1515 if (__kmp_env_consistency_check) {
1516 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001517 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001518#if USE_ITT_BUILD
1519 __kmp_itt_critical_releasing(lck);
1520#endif
1521#if KMP_USE_INLINED_TAS
1522 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1523 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1524 } else
1525#elif KMP_USE_INLINED_FUTEX
1526 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1527 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1528 } else
1529#endif
1530 {
1531 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1532 }
1533 } else {
1534 kmp_indirect_lock_t *ilk =
1535 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1536 KMP_ASSERT(ilk != NULL);
1537 lck = ilk->lock;
1538 if (__kmp_env_consistency_check) {
1539 __kmp_pop_sync(global_tid, ct_critical, loc);
1540 }
1541#if USE_ITT_BUILD
1542 __kmp_itt_critical_releasing(lck);
1543#endif
1544 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1545 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001546
1547#else // KMP_USE_DYNAMIC_LOCK
1548
Jonathan Peyton30419822017-05-12 18:01:32 +00001549 if ((__kmp_user_lock_kind == lk_tas) &&
1550 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1551 lck = (kmp_user_lock_p)crit;
1552 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001553#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001554 else if ((__kmp_user_lock_kind == lk_futex) &&
1555 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1556 lck = (kmp_user_lock_p)crit;
1557 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 else { // ticket, queuing or drdpa
1560 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1561 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562
Jonathan Peyton30419822017-05-12 18:01:32 +00001563 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564
Jonathan Peyton30419822017-05-12 18:01:32 +00001565 if (__kmp_env_consistency_check)
1566 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001567
1568#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001569 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001571 // Value of 'crit' should be good for using as a critical_id of the critical
1572 // section directive.
1573 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001574
Joachim Protze82e94a52017-11-01 10:08:30 +00001575#endif // KMP_USE_DYNAMIC_LOCK
1576
1577#if OMPT_SUPPORT && OMPT_OPTIONAL
1578 /* OMPT release event triggers after lock is released; place here to trigger
1579 * for all #if branches */
1580 OMPT_STORE_RETURN_ADDRESS(global_tid);
1581 if (ompt_enabled.ompt_callback_mutex_released) {
1582 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001583 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001584 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001585#endif
1586
Jonathan Peyton30419822017-05-12 18:01:32 +00001587 KMP_POP_PARTITIONED_TIMER();
1588 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589}
1590
1591/*!
1592@ingroup SYNCHRONIZATION
1593@param loc source location information
1594@param global_tid thread id.
1595@return one if the thread should execute the master block, zero otherwise
1596
Jonathan Peyton30419822017-05-12 18:01:32 +00001597Start execution of a combined barrier and master. The barrier is executed inside
1598this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001599*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001600kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1601 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
Jonathan Peyton30419822017-05-12 18:01:32 +00001603 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001604
Jonathan Peyton30419822017-05-12 18:01:32 +00001605 if (!TCR_4(__kmp_init_parallel))
1606 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001607
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001608#if OMP_50_ENABLED
1609 __kmp_resume_if_soft_paused();
1610#endif
1611
Jonathan Peyton30419822017-05-12 18:01:32 +00001612 if (__kmp_env_consistency_check)
1613 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Joachim Protze82e94a52017-11-01 10:08:30 +00001615#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001616 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001617 if (ompt_enabled.enabled) {
1618 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001619 if (ompt_frame->enter_frame.ptr == NULL)
1620 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001621 OMPT_STORE_RETURN_ADDRESS(global_tid);
1622 }
1623#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001624#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001625 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001627 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001628#if OMPT_SUPPORT && OMPT_OPTIONAL
1629 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001630 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001631 }
1632#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633
Jonathan Peyton30419822017-05-12 18:01:32 +00001634 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635}
1636
1637/*!
1638@ingroup SYNCHRONIZATION
1639@param loc source location information
1640@param global_tid thread id.
1641
1642Complete the execution of a combined barrier and master. This function should
1643only be called at the completion of the <tt>master</tt> code. Other threads will
1644still be waiting at the barrier and this call releases them.
1645*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001646void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1647 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650}
1651
1652/*!
1653@ingroup SYNCHRONIZATION
1654@param loc source location information
1655@param global_tid thread id.
1656@return one if the thread should execute the master block, zero otherwise
1657
1658Start execution of a combined barrier and master(nowait) construct.
1659The barrier is executed inside this function.
1660There is no equivalent "end" function, since the
1661*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001662kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1663 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 if (!TCR_4(__kmp_init_parallel))
1668 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001669
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001670#if OMP_50_ENABLED
1671 __kmp_resume_if_soft_paused();
1672#endif
1673
Jonathan Peyton30419822017-05-12 18:01:32 +00001674 if (__kmp_env_consistency_check) {
1675 if (loc == 0) {
1676 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 __kmp_check_barrier(global_tid, ct_barrier, loc);
1679 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680
Joachim Protze82e94a52017-11-01 10:08:30 +00001681#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001682 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001683 if (ompt_enabled.enabled) {
1684 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001685 if (ompt_frame->enter_frame.ptr == NULL)
1686 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001687 OMPT_STORE_RETURN_ADDRESS(global_tid);
1688 }
1689#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001690#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001691 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001692#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001693 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001694#if OMPT_SUPPORT && OMPT_OPTIONAL
1695 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001696 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001697 }
1698#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701
Jonathan Peyton30419822017-05-12 18:01:32 +00001702 if (__kmp_env_consistency_check) {
1703 /* there's no __kmpc_end_master called; so the (stats) */
1704 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705
Jonathan Peyton30419822017-05-12 18:01:32 +00001706 if (global_tid < 0) {
1707 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001708 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001709 if (ret) {
1710 /* only one thread should do the pop since only */
1711 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712
Jonathan Peyton30419822017-05-12 18:01:32 +00001713 __kmp_pop_sync(global_tid, ct_master, loc);
1714 }
1715 }
1716
1717 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718}
1719
1720/* The BARRIER for a SINGLE process section is always explicit */
1721/*!
1722@ingroup WORK_SHARING
1723@param loc source location information
1724@param global_tid global thread number
1725@return One if this thread should execute the single construct, zero otherwise.
1726
1727Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001728There are no implicit barriers in the two "single" calls, rather the compiler
1729should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730*/
1731
Jonathan Peyton30419822017-05-12 18:01:32 +00001732kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1733 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001734
Jonathan Peyton30419822017-05-12 18:01:32 +00001735 if (rc) {
1736 // We are going to execute the single statement, so we should count it.
1737 KMP_COUNT_BLOCK(OMP_SINGLE);
1738 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1739 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740
Joachim Protze82e94a52017-11-01 10:08:30 +00001741#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 kmp_info_t *this_thr = __kmp_threads[global_tid];
1743 kmp_team_t *team = this_thr->th.th_team;
1744 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745
Joachim Protze82e94a52017-11-01 10:08:30 +00001746 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001747 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001748 if (ompt_enabled.ompt_callback_work) {
1749 ompt_callbacks.ompt_callback(ompt_callback_work)(
1750 ompt_work_single_executor, ompt_scope_begin,
1751 &(team->t.ompt_team_info.parallel_data),
1752 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1753 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001754 }
1755 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001756 if (ompt_enabled.ompt_callback_work) {
1757 ompt_callbacks.ompt_callback(ompt_callback_work)(
1758 ompt_work_single_other, ompt_scope_begin,
1759 &(team->t.ompt_team_info.parallel_data),
1760 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1761 1, OMPT_GET_RETURN_ADDRESS(0));
1762 ompt_callbacks.ompt_callback(ompt_callback_work)(
1763 ompt_work_single_other, ompt_scope_end,
1764 &(team->t.ompt_team_info.parallel_data),
1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001767 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001768 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001769 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001770#endif
1771
Jonathan Peyton30419822017-05-12 18:01:32 +00001772 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773}
1774
1775/*!
1776@ingroup WORK_SHARING
1777@param loc source location information
1778@param global_tid global thread number
1779
1780Mark the end of a <tt>single</tt> construct. This function should
1781only be called by the thread that executed the block of code protected
1782by the `single` construct.
1783*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001784void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1785 __kmp_exit_single(global_tid);
1786 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001787
Joachim Protze82e94a52017-11-01 10:08:30 +00001788#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001789 kmp_info_t *this_thr = __kmp_threads[global_tid];
1790 kmp_team_t *team = this_thr->th.th_team;
1791 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001792
Joachim Protze82e94a52017-11-01 10:08:30 +00001793 if (ompt_enabled.ompt_callback_work) {
1794 ompt_callbacks.ompt_callback(ompt_callback_work)(
1795 ompt_work_single_executor, ompt_scope_end,
1796 &(team->t.ompt_team_info.parallel_data),
1797 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1798 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001799 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001800#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001801}
1802
1803/*!
1804@ingroup WORK_SHARING
1805@param loc Source location
1806@param global_tid Global thread id
1807
1808Mark the end of a statically scheduled loop.
1809*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001810void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001811 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813
Joachim Protze82e94a52017-11-01 10:08:30 +00001814#if OMPT_SUPPORT && OMPT_OPTIONAL
1815 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001816 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001817 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001818 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1819 // Determine workshare type
1820 if (loc != NULL) {
1821 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1822 ompt_work_type = ompt_work_loop;
1823 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1824 ompt_work_type = ompt_work_sections;
1825 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1826 ompt_work_type = ompt_work_distribute;
1827 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001828 // use default set above.
1829 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001830 }
1831 KMP_DEBUG_ASSERT(ompt_work_type);
1832 }
1833 ompt_callbacks.ompt_callback(ompt_callback_work)(
1834 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1835 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001836 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001837#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001838 if (__kmp_env_consistency_check)
1839 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840}
1841
Jonathan Peyton30419822017-05-12 18:01:32 +00001842// User routines which take C-style arguments (call by value)
1843// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845void ompc_set_num_threads(int arg) {
1846 // !!!!! TODO: check the per-task binding
1847 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848}
1849
Jonathan Peyton30419822017-05-12 18:01:32 +00001850void ompc_set_dynamic(int flag) {
1851 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
Jonathan Peyton30419822017-05-12 18:01:32 +00001853 /* For the thread-private implementation of the internal controls */
1854 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001857
Jonathan Peyton30419822017-05-12 18:01:32 +00001858 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001859}
1860
Jonathan Peyton30419822017-05-12 18:01:32 +00001861void ompc_set_nested(int flag) {
1862 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001863
Jonathan Peyton30419822017-05-12 18:01:32 +00001864 /* For the thread-private internal controls implementation */
1865 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866
Jonathan Peyton30419822017-05-12 18:01:32 +00001867 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868
Jonathan Peyton30419822017-05-12 18:01:32 +00001869 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001870}
1871
Jonathan Peyton30419822017-05-12 18:01:32 +00001872void ompc_set_max_active_levels(int max_active_levels) {
1873 /* TO DO */
1874 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 /* For the per-thread internal controls implementation */
1877 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878}
1879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880void ompc_set_schedule(omp_sched_t kind, int modifier) {
1881 // !!!!! TODO: check the per-task binding
1882 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883}
1884
Jonathan Peyton30419822017-05-12 18:01:32 +00001885int ompc_get_ancestor_thread_num(int level) {
1886 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887}
1888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889int ompc_get_team_size(int level) {
1890 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891}
1892
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001893#if OMP_50_ENABLED
1894/* OpenMP 5.0 Affinity Format API */
1895
1896void ompc_set_affinity_format(char const *format) {
1897 if (!__kmp_init_serial) {
1898 __kmp_serial_initialize();
1899 }
1900 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1901 format, KMP_STRLEN(format) + 1);
1902}
1903
1904size_t ompc_get_affinity_format(char *buffer, size_t size) {
1905 size_t format_size;
1906 if (!__kmp_init_serial) {
1907 __kmp_serial_initialize();
1908 }
1909 format_size = KMP_STRLEN(__kmp_affinity_format);
1910 if (buffer && size) {
1911 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1912 format_size + 1);
1913 }
1914 return format_size;
1915}
1916
1917void ompc_display_affinity(char const *format) {
1918 int gtid;
1919 if (!TCR_4(__kmp_init_middle)) {
1920 __kmp_middle_initialize();
1921 }
1922 gtid = __kmp_get_gtid();
1923 __kmp_aux_display_affinity(gtid, format);
1924}
1925
1926size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1927 char const *format) {
1928 int gtid;
1929 size_t num_required;
1930 kmp_str_buf_t capture_buf;
1931 if (!TCR_4(__kmp_init_middle)) {
1932 __kmp_middle_initialize();
1933 }
1934 gtid = __kmp_get_gtid();
1935 __kmp_str_buf_init(&capture_buf);
1936 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1937 if (buffer && buf_size) {
1938 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1939 capture_buf.used + 1);
1940 }
1941 __kmp_str_buf_free(&capture_buf);
1942 return num_required;
1943}
1944#endif /* OMP_50_ENABLED */
1945
Jonathan Peyton30419822017-05-12 18:01:32 +00001946void kmpc_set_stacksize(int arg) {
1947 // __kmp_aux_set_stacksize initializes the library if needed
1948 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949}
1950
Jonathan Peyton30419822017-05-12 18:01:32 +00001951void kmpc_set_stacksize_s(size_t arg) {
1952 // __kmp_aux_set_stacksize initializes the library if needed
1953 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954}
1955
Jonathan Peyton30419822017-05-12 18:01:32 +00001956void kmpc_set_blocktime(int arg) {
1957 int gtid, tid;
1958 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 gtid = __kmp_entry_gtid();
1961 tid = __kmp_tid_from_gtid(gtid);
1962 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963
Jonathan Peyton30419822017-05-12 18:01:32 +00001964 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965}
1966
Jonathan Peyton30419822017-05-12 18:01:32 +00001967void kmpc_set_library(int arg) {
1968 // __kmp_user_set_library initializes the library if needed
1969 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970}
1971
Jonathan Peyton30419822017-05-12 18:01:32 +00001972void kmpc_set_defaults(char const *str) {
1973 // __kmp_aux_set_defaults initializes the library if needed
1974 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975}
1976
Jonathan Peyton30419822017-05-12 18:01:32 +00001977void kmpc_set_disp_num_buffers(int arg) {
1978 // ignore after initialization because some teams have already
1979 // allocated dispatch buffers
1980 if (__kmp_init_serial == 0 && arg > 0)
1981 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001982}
1983
Jonathan Peyton30419822017-05-12 18:01:32 +00001984int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001985#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001986 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001988 if (!TCR_4(__kmp_init_middle)) {
1989 __kmp_middle_initialize();
1990 }
1991 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992#endif
1993}
1994
Jonathan Peyton30419822017-05-12 18:01:32 +00001995int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001996#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001997 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001999 if (!TCR_4(__kmp_init_middle)) {
2000 __kmp_middle_initialize();
2001 }
2002 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003#endif
2004}
2005
Jonathan Peyton30419822017-05-12 18:01:32 +00002006int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00002007#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00002008 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002010 if (!TCR_4(__kmp_init_middle)) {
2011 __kmp_middle_initialize();
2012 }
2013 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014#endif
2015}
2016
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017/* -------------------------------------------------------------------------- */
2018/*!
2019@ingroup THREADPRIVATE
2020@param loc source location information
2021@param gtid global thread number
2022@param cpy_size size of the cpy_data buffer
2023@param cpy_data pointer to data to be copied
2024@param cpy_func helper function to call for copying data
2025@param didit flag variable: 1=single thread; 0=not single thread
2026
Jonathan Peyton30419822017-05-12 18:01:32 +00002027__kmpc_copyprivate implements the interface for the private data broadcast
2028needed for the copyprivate clause associated with a single region in an
2029OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002031One of the threads (called the single thread) should have the <tt>didit</tt>
2032variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002033All threads pass a pointer to a data buffer (cpy_data) that they have built.
2034
Jonathan Peyton30419822017-05-12 18:01:32 +00002035The OpenMP specification forbids the use of nowait on the single region when a
2036copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2037barrier internally to avoid race conditions, so the code generation for the
2038single region should avoid generating a barrier after the call to @ref
2039__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040
2041The <tt>gtid</tt> parameter is the global thread id for the current thread.
2042The <tt>loc</tt> parameter is a pointer to source location information.
2043
Jonathan Peyton30419822017-05-12 18:01:32 +00002044Internal implementation: The single thread will first copy its descriptor
2045address (cpy_data) to a team-private location, then the other threads will each
2046call the function pointed to by the parameter cpy_func, which carries out the
2047copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048
Jonathan Peyton30419822017-05-12 18:01:32 +00002049The cpy_func routine used for the copy and the contents of the data area defined
2050by cpy_data and cpy_size may be built in any fashion that will allow the copy
2051to be done. For instance, the cpy_data buffer can hold the actual data to be
2052copied or it may hold a list of pointers to the data. The cpy_func routine must
2053interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054
2055The interface to cpy_func is as follows:
2056@code
2057void cpy_func( void *destination, void *source )
2058@endcode
2059where void *destination is the cpy_data pointer for the thread being copied to
2060and void *source is the cpy_data pointer for the thread being copied from.
2061*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002062void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2063 void *cpy_data, void (*cpy_func)(void *, void *),
2064 kmp_int32 didit) {
2065 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002066
Jonathan Peyton30419822017-05-12 18:01:32 +00002067 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002070
Jonathan Peyton30419822017-05-12 18:01:32 +00002071 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002072
Jonathan Peyton30419822017-05-12 18:01:32 +00002073 if (__kmp_env_consistency_check) {
2074 if (loc == 0) {
2075 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002076 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002077 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
Jonathan Peyton30419822017-05-12 18:01:32 +00002079 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002080
Jonathan Peyton30419822017-05-12 18:01:32 +00002081 if (didit)
2082 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
Joachim Protze82e94a52017-11-01 10:08:30 +00002084#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002085 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002086 if (ompt_enabled.enabled) {
2087 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002088 if (ompt_frame->enter_frame.ptr == NULL)
2089 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002090 OMPT_STORE_RETURN_ADDRESS(gtid);
2091 }
2092#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002093/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002094#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002095 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002097 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002098
Jonathan Peyton30419822017-05-12 18:01:32 +00002099 if (!didit)
2100 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101
Jonathan Peyton30419822017-05-12 18:01:32 +00002102// Consider next barrier a user-visible barrier for barrier region boundaries
2103// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104
Joachim Protze82e94a52017-11-01 10:08:30 +00002105#if OMPT_SUPPORT
2106 if (ompt_enabled.enabled) {
2107 OMPT_STORE_RETURN_ADDRESS(gtid);
2108 }
2109#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002110#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002111 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2112// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002113#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002114 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002115#if OMPT_SUPPORT && OMPT_OPTIONAL
2116 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002117 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002118 }
2119#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120}
2121
2122/* -------------------------------------------------------------------------- */
2123
Jonathan Peyton30419822017-05-12 18:01:32 +00002124#define INIT_LOCK __kmp_init_user_lock_with_checks
2125#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2126#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2127#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2128#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2129#define ACQUIRE_NESTED_LOCK_TIMED \
2130 __kmp_acquire_nested_user_lock_with_checks_timed
2131#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2132#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2133#define TEST_LOCK __kmp_test_user_lock_with_checks
2134#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2135#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2136#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137
Jonathan Peyton30419822017-05-12 18:01:32 +00002138// TODO: Make check abort messages use location info & pass it into
2139// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002140
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002141#if KMP_USE_DYNAMIC_LOCK
2142
2143// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002144static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2145 kmp_dyna_lockseq_t seq) {
2146 if (KMP_IS_D_LOCK(seq)) {
2147 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002148#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002149 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002150#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002151 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002152 KMP_INIT_I_LOCK(lock, seq);
2153#if USE_ITT_BUILD
2154 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2155 __kmp_itt_lock_creating(ilk->lock, loc);
2156#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002157 }
2158}
2159
2160// internal nest lock initializer
2161static __forceinline void
2162__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2163 kmp_dyna_lockseq_t seq) {
2164#if KMP_USE_TSX
2165 // Don't have nested lock implementation for speculative locks
2166 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2167 seq = __kmp_user_lock_seq;
2168#endif
2169 switch (seq) {
2170 case lockseq_tas:
2171 seq = lockseq_nested_tas;
2172 break;
2173#if KMP_USE_FUTEX
2174 case lockseq_futex:
2175 seq = lockseq_nested_futex;
2176 break;
2177#endif
2178 case lockseq_ticket:
2179 seq = lockseq_nested_ticket;
2180 break;
2181 case lockseq_queuing:
2182 seq = lockseq_nested_queuing;
2183 break;
2184 case lockseq_drdpa:
2185 seq = lockseq_nested_drdpa;
2186 break;
2187 default:
2188 seq = lockseq_nested_queuing;
2189 }
2190 KMP_INIT_I_LOCK(lock, seq);
2191#if USE_ITT_BUILD
2192 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2193 __kmp_itt_lock_creating(ilk->lock, loc);
2194#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002195}
2196
2197/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002198void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2199 uintptr_t hint) {
2200 KMP_DEBUG_ASSERT(__kmp_init_serial);
2201 if (__kmp_env_consistency_check && user_lock == NULL) {
2202 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2203 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002204
Jonathan Peyton30419822017-05-12 18:01:32 +00002205 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002206
2207#if OMPT_SUPPORT && OMPT_OPTIONAL
2208 // This is the case, if called from omp_init_lock_with_hint:
2209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2210 if (!codeptr)
2211 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2212 if (ompt_enabled.ompt_callback_lock_init) {
2213 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2214 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002215 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002216 codeptr);
2217 }
2218#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002219}
2220
2221/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002222void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2223 void **user_lock, uintptr_t hint) {
2224 KMP_DEBUG_ASSERT(__kmp_init_serial);
2225 if (__kmp_env_consistency_check && user_lock == NULL) {
2226 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2227 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002228
Jonathan Peyton30419822017-05-12 18:01:32 +00002229 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002230
2231#if OMPT_SUPPORT && OMPT_OPTIONAL
2232 // This is the case, if called from omp_init_lock_with_hint:
2233 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2234 if (!codeptr)
2235 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2236 if (ompt_enabled.ompt_callback_lock_init) {
2237 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2238 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002239 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002240 codeptr);
2241 }
2242#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002243}
2244
2245#endif // KMP_USE_DYNAMIC_LOCK
2246
Jim Cownie5e8470a2013-09-27 10:38:44 +00002247/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002248void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002249#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002250
2251 KMP_DEBUG_ASSERT(__kmp_init_serial);
2252 if (__kmp_env_consistency_check && user_lock == NULL) {
2253 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2254 }
2255 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002256
Joachim Protze82e94a52017-11-01 10:08:30 +00002257#if OMPT_SUPPORT && OMPT_OPTIONAL
2258 // This is the case, if called from omp_init_lock_with_hint:
2259 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2260 if (!codeptr)
2261 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2262 if (ompt_enabled.ompt_callback_lock_init) {
2263 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2264 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002265 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002266 codeptr);
2267 }
2268#endif
2269
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002270#else // KMP_USE_DYNAMIC_LOCK
2271
Jonathan Peyton30419822017-05-12 18:01:32 +00002272 static char const *const func = "omp_init_lock";
2273 kmp_user_lock_p lck;
2274 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275
Jonathan Peyton30419822017-05-12 18:01:32 +00002276 if (__kmp_env_consistency_check) {
2277 if (user_lock == NULL) {
2278 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002280 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002281
Jonathan Peyton30419822017-05-12 18:01:32 +00002282 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
Jonathan Peyton30419822017-05-12 18:01:32 +00002284 if ((__kmp_user_lock_kind == lk_tas) &&
2285 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2286 lck = (kmp_user_lock_p)user_lock;
2287 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002288#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002289 else if ((__kmp_user_lock_kind == lk_futex) &&
2290 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2291 lck = (kmp_user_lock_p)user_lock;
2292 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002293#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002294 else {
2295 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2296 }
2297 INIT_LOCK(lck);
2298 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299
Joachim Protze82e94a52017-11-01 10:08:30 +00002300#if OMPT_SUPPORT && OMPT_OPTIONAL
2301 // This is the case, if called from omp_init_lock_with_hint:
2302 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2303 if (!codeptr)
2304 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2305 if (ompt_enabled.ompt_callback_lock_init) {
2306 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2307 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002308 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002309 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002310#endif
2311
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002313 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002314#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002315
2316#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317} // __kmpc_init_lock
2318
2319/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002320void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002321#if KMP_USE_DYNAMIC_LOCK
2322
Jonathan Peyton30419822017-05-12 18:01:32 +00002323 KMP_DEBUG_ASSERT(__kmp_init_serial);
2324 if (__kmp_env_consistency_check && user_lock == NULL) {
2325 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2326 }
2327 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002328
Joachim Protze82e94a52017-11-01 10:08:30 +00002329#if OMPT_SUPPORT && OMPT_OPTIONAL
2330 // This is the case, if called from omp_init_lock_with_hint:
2331 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2332 if (!codeptr)
2333 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2334 if (ompt_enabled.ompt_callback_lock_init) {
2335 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2336 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002337 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002338 codeptr);
2339 }
2340#endif
2341
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002342#else // KMP_USE_DYNAMIC_LOCK
2343
Jonathan Peyton30419822017-05-12 18:01:32 +00002344 static char const *const func = "omp_init_nest_lock";
2345 kmp_user_lock_p lck;
2346 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347
Jonathan Peyton30419822017-05-12 18:01:32 +00002348 if (__kmp_env_consistency_check) {
2349 if (user_lock == NULL) {
2350 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002351 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002352 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002353
Jonathan Peyton30419822017-05-12 18:01:32 +00002354 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355
Jonathan Peyton30419822017-05-12 18:01:32 +00002356 if ((__kmp_user_lock_kind == lk_tas) &&
2357 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2358 OMP_NEST_LOCK_T_SIZE)) {
2359 lck = (kmp_user_lock_p)user_lock;
2360 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002361#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002362 else if ((__kmp_user_lock_kind == lk_futex) &&
2363 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2364 OMP_NEST_LOCK_T_SIZE)) {
2365 lck = (kmp_user_lock_p)user_lock;
2366 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 else {
2369 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2370 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371
Jonathan Peyton30419822017-05-12 18:01:32 +00002372 INIT_NESTED_LOCK(lck);
2373 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374
Joachim Protze82e94a52017-11-01 10:08:30 +00002375#if OMPT_SUPPORT && OMPT_OPTIONAL
2376 // This is the case, if called from omp_init_lock_with_hint:
2377 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2378 if (!codeptr)
2379 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2380 if (ompt_enabled.ompt_callback_lock_init) {
2381 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2382 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002383 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002384 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002385#endif
2386
Jim Cownie5e8470a2013-09-27 10:38:44 +00002387#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002388 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002390
2391#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002392} // __kmpc_init_nest_lock
2393
Jonathan Peyton30419822017-05-12 18:01:32 +00002394void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002395#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396
Jonathan Peyton30419822017-05-12 18:01:32 +00002397#if USE_ITT_BUILD
2398 kmp_user_lock_p lck;
2399 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2400 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2401 } else {
2402 lck = (kmp_user_lock_p)user_lock;
2403 }
2404 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002405#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002406#if OMPT_SUPPORT && OMPT_OPTIONAL
2407 // This is the case, if called from omp_init_lock_with_hint:
2408 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2409 if (!codeptr)
2410 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2411 if (ompt_enabled.ompt_callback_lock_destroy) {
2412 kmp_user_lock_p lck;
2413 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2414 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2415 } else {
2416 lck = (kmp_user_lock_p)user_lock;
2417 }
2418 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002419 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002420 }
2421#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002422 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2423#else
2424 kmp_user_lock_p lck;
2425
2426 if ((__kmp_user_lock_kind == lk_tas) &&
2427 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2428 lck = (kmp_user_lock_p)user_lock;
2429 }
2430#if KMP_USE_FUTEX
2431 else if ((__kmp_user_lock_kind == lk_futex) &&
2432 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2433 lck = (kmp_user_lock_p)user_lock;
2434 }
2435#endif
2436 else {
2437 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2438 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439
Joachim Protze82e94a52017-11-01 10:08:30 +00002440#if OMPT_SUPPORT && OMPT_OPTIONAL
2441 // This is the case, if called from omp_init_lock_with_hint:
2442 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443 if (!codeptr)
2444 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2445 if (ompt_enabled.ompt_callback_lock_destroy) {
2446 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002447 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002448 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002449#endif
2450
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002452 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002453#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002454 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455
Jonathan Peyton30419822017-05-12 18:01:32 +00002456 if ((__kmp_user_lock_kind == lk_tas) &&
2457 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2458 ;
2459 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002460#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002461 else if ((__kmp_user_lock_kind == lk_futex) &&
2462 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2463 ;
2464 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002466 else {
2467 __kmp_user_lock_free(user_lock, gtid, lck);
2468 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002469#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470} // __kmpc_destroy_lock
2471
2472/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002473void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002474#if KMP_USE_DYNAMIC_LOCK
2475
Jonathan Peyton30419822017-05-12 18:01:32 +00002476#if USE_ITT_BUILD
2477 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2478 __kmp_itt_lock_destroyed(ilk->lock);
2479#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002480#if OMPT_SUPPORT && OMPT_OPTIONAL
2481 // This is the case, if called from omp_init_lock_with_hint:
2482 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2483 if (!codeptr)
2484 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2485 if (ompt_enabled.ompt_callback_lock_destroy) {
2486 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002487 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002488 }
2489#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002490 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002491
2492#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493
Jonathan Peyton30419822017-05-12 18:01:32 +00002494 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495
Jonathan Peyton30419822017-05-12 18:01:32 +00002496 if ((__kmp_user_lock_kind == lk_tas) &&
2497 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2498 OMP_NEST_LOCK_T_SIZE)) {
2499 lck = (kmp_user_lock_p)user_lock;
2500 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002501#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002502 else if ((__kmp_user_lock_kind == lk_futex) &&
2503 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2504 OMP_NEST_LOCK_T_SIZE)) {
2505 lck = (kmp_user_lock_p)user_lock;
2506 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002508 else {
2509 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2510 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002511
Joachim Protze82e94a52017-11-01 10:08:30 +00002512#if OMPT_SUPPORT && OMPT_OPTIONAL
2513 // This is the case, if called from omp_init_lock_with_hint:
2514 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2515 if (!codeptr)
2516 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2517 if (ompt_enabled.ompt_callback_lock_destroy) {
2518 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002519 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002520 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002521#endif
2522
Jim Cownie5e8470a2013-09-27 10:38:44 +00002523#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002524 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002525#endif /* USE_ITT_BUILD */
2526
Jonathan Peyton30419822017-05-12 18:01:32 +00002527 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002528
Jonathan Peyton30419822017-05-12 18:01:32 +00002529 if ((__kmp_user_lock_kind == lk_tas) &&
2530 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2531 OMP_NEST_LOCK_T_SIZE)) {
2532 ;
2533 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002534#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002535 else if ((__kmp_user_lock_kind == lk_futex) &&
2536 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2537 OMP_NEST_LOCK_T_SIZE)) {
2538 ;
2539 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002541 else {
2542 __kmp_user_lock_free(user_lock, gtid, lck);
2543 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002544#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002545} // __kmpc_destroy_nest_lock
2546
Jonathan Peyton30419822017-05-12 18:01:32 +00002547void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2548 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002549#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002550 int tag = KMP_EXTRACT_D_TAG(user_lock);
2551#if USE_ITT_BUILD
2552 __kmp_itt_lock_acquiring(
2553 (kmp_user_lock_p)
2554 user_lock); // itt function will get to the right lock object.
2555#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002556#if OMPT_SUPPORT && OMPT_OPTIONAL
2557 // This is the case, if called from omp_init_lock_with_hint:
2558 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2559 if (!codeptr)
2560 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2561 if (ompt_enabled.ompt_callback_mutex_acquire) {
2562 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2563 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002564 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002565 codeptr);
2566 }
2567#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002568#if KMP_USE_INLINED_TAS
2569 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2570 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2571 } else
2572#elif KMP_USE_INLINED_FUTEX
2573 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2574 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2575 } else
2576#endif
2577 {
2578 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2579 }
2580#if USE_ITT_BUILD
2581 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2582#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002583#if OMPT_SUPPORT && OMPT_OPTIONAL
2584 if (ompt_enabled.ompt_callback_mutex_acquired) {
2585 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002586 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002587 }
2588#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002589
2590#else // KMP_USE_DYNAMIC_LOCK
2591
Jonathan Peyton30419822017-05-12 18:01:32 +00002592 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593
Jonathan Peyton30419822017-05-12 18:01:32 +00002594 if ((__kmp_user_lock_kind == lk_tas) &&
2595 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2596 lck = (kmp_user_lock_p)user_lock;
2597 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002598#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002599 else if ((__kmp_user_lock_kind == lk_futex) &&
2600 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2601 lck = (kmp_user_lock_p)user_lock;
2602 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002604 else {
2605 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2606 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607
2608#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002609 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002611#if OMPT_SUPPORT && OMPT_OPTIONAL
2612 // This is the case, if called from omp_init_lock_with_hint:
2613 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2614 if (!codeptr)
2615 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2616 if (ompt_enabled.ompt_callback_mutex_acquire) {
2617 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2618 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002619 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002620 }
2621#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622
Jonathan Peyton30419822017-05-12 18:01:32 +00002623 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624
2625#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002626 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002627#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002628
Joachim Protze82e94a52017-11-01 10:08:30 +00002629#if OMPT_SUPPORT && OMPT_OPTIONAL
2630 if (ompt_enabled.ompt_callback_mutex_acquired) {
2631 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002632 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002634#endif
2635
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002636#endif // KMP_USE_DYNAMIC_LOCK
2637}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002638
Jonathan Peyton30419822017-05-12 18:01:32 +00002639void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002640#if KMP_USE_DYNAMIC_LOCK
2641
Jonathan Peyton30419822017-05-12 18:01:32 +00002642#if USE_ITT_BUILD
2643 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2644#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002645#if OMPT_SUPPORT && OMPT_OPTIONAL
2646 // This is the case, if called from omp_init_lock_with_hint:
2647 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2648 if (!codeptr)
2649 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2650 if (ompt_enabled.enabled) {
2651 if (ompt_enabled.ompt_callback_mutex_acquire) {
2652 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2653 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002654 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002655 codeptr);
2656 }
2657 }
2658#endif
2659 int acquire_status =
2660 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002661 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002662#if USE_ITT_BUILD
2663 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002664#endif
2665
Joachim Protze82e94a52017-11-01 10:08:30 +00002666#if OMPT_SUPPORT && OMPT_OPTIONAL
2667 if (ompt_enabled.enabled) {
2668 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2669 if (ompt_enabled.ompt_callback_mutex_acquired) {
2670 // lock_first
2671 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002672 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002673 }
2674 } else {
2675 if (ompt_enabled.ompt_callback_nest_lock) {
2676 // lock_next
2677 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002678 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002679 }
2680 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002682#endif
2683
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002684#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002685 int acquire_status;
2686 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002687
Jonathan Peyton30419822017-05-12 18:01:32 +00002688 if ((__kmp_user_lock_kind == lk_tas) &&
2689 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2690 OMP_NEST_LOCK_T_SIZE)) {
2691 lck = (kmp_user_lock_p)user_lock;
2692 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002693#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 else if ((__kmp_user_lock_kind == lk_futex) &&
2695 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2696 OMP_NEST_LOCK_T_SIZE)) {
2697 lck = (kmp_user_lock_p)user_lock;
2698 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 else {
2701 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2702 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703
2704#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002705 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002706#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002707#if OMPT_SUPPORT && OMPT_OPTIONAL
2708 // This is the case, if called from omp_init_lock_with_hint:
2709 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2710 if (!codeptr)
2711 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2712 if (ompt_enabled.enabled) {
2713 if (ompt_enabled.ompt_callback_mutex_acquire) {
2714 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2715 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002716 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002717 }
2718 }
2719#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720
Jonathan Peyton30419822017-05-12 18:01:32 +00002721 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722
2723#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002726
Joachim Protze82e94a52017-11-01 10:08:30 +00002727#if OMPT_SUPPORT && OMPT_OPTIONAL
2728 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002729 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002730 if (ompt_enabled.ompt_callback_mutex_acquired) {
2731 // lock_first
2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002733 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002734 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002736 if (ompt_enabled.ompt_callback_nest_lock) {
2737 // lock_next
2738 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002739 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002740 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002741 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002742 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002743#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002744
2745#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002746}
2747
Jonathan Peyton30419822017-05-12 18:01:32 +00002748void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002749#if KMP_USE_DYNAMIC_LOCK
2750
Jonathan Peyton30419822017-05-12 18:01:32 +00002751 int tag = KMP_EXTRACT_D_TAG(user_lock);
2752#if USE_ITT_BUILD
2753 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2754#endif
2755#if KMP_USE_INLINED_TAS
2756 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2757 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2758 } else
2759#elif KMP_USE_INLINED_FUTEX
2760 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2761 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2762 } else
2763#endif
2764 {
2765 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2766 }
2767
Joachim Protze82e94a52017-11-01 10:08:30 +00002768#if OMPT_SUPPORT && OMPT_OPTIONAL
2769 // This is the case, if called from omp_init_lock_with_hint:
2770 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2771 if (!codeptr)
2772 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2773 if (ompt_enabled.ompt_callback_mutex_released) {
2774 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002775 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002776 }
2777#endif
2778
Jonathan Peyton30419822017-05-12 18:01:32 +00002779#else // KMP_USE_DYNAMIC_LOCK
2780
2781 kmp_user_lock_p lck;
2782
2783 /* Can't use serial interval since not block structured */
2784 /* release the lock */
2785
2786 if ((__kmp_user_lock_kind == lk_tas) &&
2787 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2788#if KMP_OS_LINUX && \
2789 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2790// "fast" path implemented to fix customer performance issue
2791#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002792 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002793#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002794 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2795 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002796
2797#if OMPT_SUPPORT && OMPT_OPTIONAL
2798 // This is the case, if called from omp_init_lock_with_hint:
2799 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2800 if (!codeptr)
2801 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2802 if (ompt_enabled.ompt_callback_mutex_released) {
2803 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002804 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002805 }
2806#endif
2807
Jonathan Peyton30419822017-05-12 18:01:32 +00002808 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002810 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002811#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002812 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002813#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 else if ((__kmp_user_lock_kind == lk_futex) &&
2815 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2816 lck = (kmp_user_lock_p)user_lock;
2817 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002819 else {
2820 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2821 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002822
2823#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002824 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825#endif /* USE_ITT_BUILD */
2826
Jonathan Peyton30419822017-05-12 18:01:32 +00002827 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002828
Joachim Protze82e94a52017-11-01 10:08:30 +00002829#if OMPT_SUPPORT && OMPT_OPTIONAL
2830 // This is the case, if called from omp_init_lock_with_hint:
2831 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2832 if (!codeptr)
2833 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2834 if (ompt_enabled.ompt_callback_mutex_released) {
2835 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002836 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002838#endif
2839
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002840#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
2843/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002844void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002845#if KMP_USE_DYNAMIC_LOCK
2846
Jonathan Peyton30419822017-05-12 18:01:32 +00002847#if USE_ITT_BUILD
2848 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2849#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002850 int release_status =
2851 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002852 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002853
2854#if OMPT_SUPPORT && OMPT_OPTIONAL
2855 // This is the case, if called from omp_init_lock_with_hint:
2856 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2857 if (!codeptr)
2858 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2859 if (ompt_enabled.enabled) {
2860 if (release_status == KMP_LOCK_RELEASED) {
2861 if (ompt_enabled.ompt_callback_mutex_released) {
2862 // release_lock_last
2863 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002864 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002865 }
2866 } else if (ompt_enabled.ompt_callback_nest_lock) {
2867 // release_lock_prev
2868 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002869 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002870 }
2871 }
2872#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002873
2874#else // KMP_USE_DYNAMIC_LOCK
2875
2876 kmp_user_lock_p lck;
2877
2878 /* Can't use serial interval since not block structured */
2879
2880 if ((__kmp_user_lock_kind == lk_tas) &&
2881 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2882 OMP_NEST_LOCK_T_SIZE)) {
2883#if KMP_OS_LINUX && \
2884 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2885 // "fast" path implemented to fix customer performance issue
2886 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2887#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002888 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002889#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002890
2891#if OMPT_SUPPORT && OMPT_OPTIONAL
2892 int release_status = KMP_LOCK_STILL_HELD;
2893#endif
2894
Jonathan Peyton30419822017-05-12 18:01:32 +00002895 if (--(tl->lk.depth_locked) == 0) {
2896 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002897#if OMPT_SUPPORT && OMPT_OPTIONAL
2898 release_status = KMP_LOCK_RELEASED;
2899#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002900 }
2901 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002902
2903#if OMPT_SUPPORT && OMPT_OPTIONAL
2904 // This is the case, if called from omp_init_lock_with_hint:
2905 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2906 if (!codeptr)
2907 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2908 if (ompt_enabled.enabled) {
2909 if (release_status == KMP_LOCK_RELEASED) {
2910 if (ompt_enabled.ompt_callback_mutex_released) {
2911 // release_lock_last
2912 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002913 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002914 }
2915 } else if (ompt_enabled.ompt_callback_nest_lock) {
2916 // release_lock_previous
2917 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002918 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002919 }
2920 }
2921#endif
2922
Jonathan Peyton30419822017-05-12 18:01:32 +00002923 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002924#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002925 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002926#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002927 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002928#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002929 else if ((__kmp_user_lock_kind == lk_futex) &&
2930 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2931 OMP_NEST_LOCK_T_SIZE)) {
2932 lck = (kmp_user_lock_p)user_lock;
2933 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002934#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002935 else {
2936 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2937 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002938
2939#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002940 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002941#endif /* USE_ITT_BUILD */
2942
Jonathan Peyton30419822017-05-12 18:01:32 +00002943 int release_status;
2944 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002945#if OMPT_SUPPORT && OMPT_OPTIONAL
2946 // This is the case, if called from omp_init_lock_with_hint:
2947 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2948 if (!codeptr)
2949 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2950 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002951 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002952 if (ompt_enabled.ompt_callback_mutex_released) {
2953 // release_lock_last
2954 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002955 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002956 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002957 } else if (ompt_enabled.ompt_callback_nest_lock) {
2958 // release_lock_previous
2959 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002960 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002961 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002962 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002963#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002964
2965#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966}
2967
2968/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002969int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2970 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002971
2972#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002973 int rc;
2974 int tag = KMP_EXTRACT_D_TAG(user_lock);
2975#if USE_ITT_BUILD
2976 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2977#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002978#if OMPT_SUPPORT && OMPT_OPTIONAL
2979 // This is the case, if called from omp_init_lock_with_hint:
2980 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2981 if (!codeptr)
2982 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2983 if (ompt_enabled.ompt_callback_mutex_acquire) {
2984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2985 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002986 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002987 codeptr);
2988 }
2989#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002990#if KMP_USE_INLINED_TAS
2991 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2992 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2993 } else
2994#elif KMP_USE_INLINED_FUTEX
2995 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2996 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2997 } else
2998#endif
2999 {
3000 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3001 }
3002 if (rc) {
3003#if USE_ITT_BUILD
3004 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3005#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003006#if OMPT_SUPPORT && OMPT_OPTIONAL
3007 if (ompt_enabled.ompt_callback_mutex_acquired) {
3008 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003009 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003010 }
3011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003012 return FTN_TRUE;
3013 } else {
3014#if USE_ITT_BUILD
3015 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3016#endif
3017 return FTN_FALSE;
3018 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003019
3020#else // KMP_USE_DYNAMIC_LOCK
3021
Jonathan Peyton30419822017-05-12 18:01:32 +00003022 kmp_user_lock_p lck;
3023 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024
Jonathan Peyton30419822017-05-12 18:01:32 +00003025 if ((__kmp_user_lock_kind == lk_tas) &&
3026 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3027 lck = (kmp_user_lock_p)user_lock;
3028 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003029#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003030 else if ((__kmp_user_lock_kind == lk_futex) &&
3031 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3032 lck = (kmp_user_lock_p)user_lock;
3033 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003035 else {
3036 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3037 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003038
3039#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003041#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003042#if OMPT_SUPPORT && OMPT_OPTIONAL
3043 // This is the case, if called from omp_init_lock_with_hint:
3044 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3045 if (!codeptr)
3046 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3047 if (ompt_enabled.ompt_callback_mutex_acquire) {
3048 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3049 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003050 (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003051 }
3052#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003053
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003056 if (rc) {
3057 __kmp_itt_lock_acquired(lck);
3058 } else {
3059 __kmp_itt_lock_cancelled(lck);
3060 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003061#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003062#if OMPT_SUPPORT && OMPT_OPTIONAL
3063 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003065 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003066 }
3067#endif
3068
Jonathan Peyton30419822017-05-12 18:01:32 +00003069 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003070
Jonathan Peyton30419822017-05-12 18:01:32 +00003071/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003072
3073#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074}
3075
3076/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00003077int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003078#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003079 int rc;
3080#if USE_ITT_BUILD
3081 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3082#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003083#if OMPT_SUPPORT && OMPT_OPTIONAL
3084 // This is the case, if called from omp_init_lock_with_hint:
3085 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3086 if (!codeptr)
3087 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3088 if (ompt_enabled.ompt_callback_mutex_acquire) {
3089 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3090 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003091 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003092 codeptr);
3093 }
3094#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003095 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3096#if USE_ITT_BUILD
3097 if (rc) {
3098 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3099 } else {
3100 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3101 }
3102#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003103#if OMPT_SUPPORT && OMPT_OPTIONAL
3104 if (ompt_enabled.enabled && rc) {
3105 if (rc == 1) {
3106 if (ompt_enabled.ompt_callback_mutex_acquired) {
3107 // lock_first
3108 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003109 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003110 }
3111 } else {
3112 if (ompt_enabled.ompt_callback_nest_lock) {
3113 // lock_next
3114 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003115 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003116 }
3117 }
3118 }
3119#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003120 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003121
3122#else // KMP_USE_DYNAMIC_LOCK
3123
Jonathan Peyton30419822017-05-12 18:01:32 +00003124 kmp_user_lock_p lck;
3125 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126
Jonathan Peyton30419822017-05-12 18:01:32 +00003127 if ((__kmp_user_lock_kind == lk_tas) &&
3128 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3129 OMP_NEST_LOCK_T_SIZE)) {
3130 lck = (kmp_user_lock_p)user_lock;
3131 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003132#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003133 else if ((__kmp_user_lock_kind == lk_futex) &&
3134 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3135 OMP_NEST_LOCK_T_SIZE)) {
3136 lck = (kmp_user_lock_p)user_lock;
3137 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 else {
3140 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3141 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003142
3143#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003144 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145#endif /* USE_ITT_BUILD */
3146
Joachim Protze82e94a52017-11-01 10:08:30 +00003147#if OMPT_SUPPORT && OMPT_OPTIONAL
3148 // This is the case, if called from omp_init_lock_with_hint:
3149 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3150 if (!codeptr)
3151 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3152 if (ompt_enabled.enabled) &&
3153 ompt_enabled.ompt_callback_mutex_acquire) {
3154 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3155 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003156 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003157 }
3158#endif
3159
Jonathan Peyton30419822017-05-12 18:01:32 +00003160 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 if (rc) {
3163 __kmp_itt_lock_acquired(lck);
3164 } else {
3165 __kmp_itt_lock_cancelled(lck);
3166 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003168#if OMPT_SUPPORT && OMPT_OPTIONAL
3169 if (ompt_enabled.enabled && rc) {
3170 if (rc == 1) {
3171 if (ompt_enabled.ompt_callback_mutex_acquired) {
3172 // lock_first
3173 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003174 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003175 }
3176 } else {
3177 if (ompt_enabled.ompt_callback_nest_lock) {
3178 // lock_next
3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003180 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003181 }
3182 }
3183 }
3184#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003185 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003186
Jonathan Peyton30419822017-05-12 18:01:32 +00003187/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003188
3189#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003190}
3191
Jonathan Peyton30419822017-05-12 18:01:32 +00003192// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003193
Jonathan Peyton30419822017-05-12 18:01:32 +00003194// keep the selected method in a thread local structure for cross-function
3195// usage: will be used in __kmpc_end_reduce* functions;
3196// another solution: to re-determine the method one more time in
3197// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003198// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003199#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3200 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201
Jonathan Peyton30419822017-05-12 18:01:32 +00003202#define __KMP_GET_REDUCTION_METHOD(gtid) \
3203 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003204
Jonathan Peyton30419822017-05-12 18:01:32 +00003205// description of the packed_reduction_method variable: look at the macros in
3206// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003207
3208// used in a critical section reduce block
3209static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003210__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3211 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003212
Jonathan Peyton30419822017-05-12 18:01:32 +00003213 // this lock was visible to a customer and to the threading profile tool as a
3214 // serial overhead span (although it's used for an internal purpose only)
3215 // why was it visible in previous implementation?
3216 // should we keep it visible in new reduce block?
3217 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003218
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003219#if KMP_USE_DYNAMIC_LOCK
3220
Jonathan Peyton30419822017-05-12 18:01:32 +00003221 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3222 // Check if it is initialized.
3223 if (*lk == 0) {
3224 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3225 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3226 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003227 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003228 __kmp_init_indirect_csptr(crit, loc, global_tid,
3229 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003230 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003231 }
3232 // Branch for accessing the actual lock object and set operation. This
3233 // branching is inevitable since this lock initialization does not follow the
3234 // normal dispatch path (lock table is not used).
3235 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3236 lck = (kmp_user_lock_p)lk;
3237 KMP_DEBUG_ASSERT(lck != NULL);
3238 if (__kmp_env_consistency_check) {
3239 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3240 }
3241 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3242 } else {
3243 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3244 lck = ilk->lock;
3245 KMP_DEBUG_ASSERT(lck != NULL);
3246 if (__kmp_env_consistency_check) {
3247 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3248 }
3249 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3250 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003251
3252#else // KMP_USE_DYNAMIC_LOCK
3253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 // We know that the fast reduction code is only emitted by Intel compilers
3255 // with 32 byte critical sections. If there isn't enough space, then we
3256 // have to use a pointer.
3257 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3258 lck = (kmp_user_lock_p)crit;
3259 } else {
3260 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3261 }
3262 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 if (__kmp_env_consistency_check)
3265 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003266
Jonathan Peyton30419822017-05-12 18:01:32 +00003267 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003268
3269#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003270}
3271
3272// used in a critical section reduce block
3273static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003274__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3275 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003278
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003279#if KMP_USE_DYNAMIC_LOCK
3280
Jonathan Peyton30419822017-05-12 18:01:32 +00003281 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3282 lck = (kmp_user_lock_p)crit;
3283 if (__kmp_env_consistency_check)
3284 __kmp_pop_sync(global_tid, ct_critical, loc);
3285 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3286 } else {
3287 kmp_indirect_lock_t *ilk =
3288 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3289 if (__kmp_env_consistency_check)
3290 __kmp_pop_sync(global_tid, ct_critical, loc);
3291 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3292 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003293
3294#else // KMP_USE_DYNAMIC_LOCK
3295
Jonathan Peyton30419822017-05-12 18:01:32 +00003296 // We know that the fast reduction code is only emitted by Intel compilers
3297 // with 32 byte critical sections. If there isn't enough space, then we have
3298 // to use a pointer.
3299 if (__kmp_base_user_lock_size > 32) {
3300 lck = *((kmp_user_lock_p *)crit);
3301 KMP_ASSERT(lck != NULL);
3302 } else {
3303 lck = (kmp_user_lock_p)crit;
3304 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003305
Jonathan Peyton30419822017-05-12 18:01:32 +00003306 if (__kmp_env_consistency_check)
3307 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003308
Jonathan Peyton30419822017-05-12 18:01:32 +00003309 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003310
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003311#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003312} // __kmp_end_critical_section_reduce_block
3313
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003314#if OMP_40_ENABLED
3315static __forceinline int
3316__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3317 int *task_state) {
3318 kmp_team_t *team;
3319
3320 // Check if we are inside the teams construct?
3321 if (th->th.th_teams_microtask) {
3322 *team_p = team = th->th.th_team;
3323 if (team->t.t_level == th->th.th_teams_level) {
3324 // This is reduction at teams construct.
3325 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3326 // Let's swap teams temporarily for the reduction.
3327 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3328 th->th.th_team = team->t.t_parent;
3329 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3330 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3331 *task_state = th->th.th_task_state;
3332 th->th.th_task_state = 0;
3333
3334 return 1;
3335 }
3336 }
3337 return 0;
3338}
3339
3340static __forceinline void
3341__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3342 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3343 th->th.th_info.ds.ds_tid = 0;
3344 th->th.th_team = team;
3345 th->th.th_team_nproc = team->t.t_nproc;
3346 th->th.th_task_team = team->t.t_task_team[task_state];
3347 th->th.th_task_state = task_state;
3348}
3349#endif
3350
Jim Cownie5e8470a2013-09-27 10:38:44 +00003351/* 2.a.i. Reduce Block without a terminating barrier */
3352/*!
3353@ingroup SYNCHRONIZATION
3354@param loc source location information
3355@param global_tid global thread number
3356@param num_vars number of items (variables) to be reduced
3357@param reduce_size size of data in bytes to be reduced
3358@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003359@param reduce_func callback function providing reduction operation on two
3360operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003361@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003362@result 1 for the master thread, 0 for all other team threads, 2 for all team
3363threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364
3365The nowait version is used for a reduce clause with the nowait argument.
3366*/
3367kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003368__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3369 size_t reduce_size, void *reduce_data,
3370 void (*reduce_func)(void *lhs_data, void *rhs_data),
3371 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003372
Jonathan Peyton30419822017-05-12 18:01:32 +00003373 KMP_COUNT_BLOCK(REDUCE_nowait);
3374 int retval = 0;
3375 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003376#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003377 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003378 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003379 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003380#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003381 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003382
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 // why do we need this initialization here at all?
3384 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386 // do not call __kmp_serial_initialize(), it will be called by
3387 // __kmp_parallel_initialize() if needed
3388 // possible detection of false-positive race by the threadchecker ???
3389 if (!TCR_4(__kmp_init_parallel))
3390 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003391
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003392#if OMP_50_ENABLED
3393 __kmp_resume_if_soft_paused();
3394#endif
3395
Jonathan Peyton30419822017-05-12 18:01:32 +00003396// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003397#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003398 if (__kmp_env_consistency_check)
3399 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003400#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003401 if (__kmp_env_consistency_check)
3402 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003403#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003405#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003406 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003407 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003408#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409
Jonathan Peyton30419822017-05-12 18:01:32 +00003410 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3411 // the value should be kept in a variable
3412 // the variable should be either a construct-specific or thread-specific
3413 // property, not a team specific property
3414 // (a thread can reach the next reduce block on the next construct, reduce
3415 // method may differ on the next construct)
3416 // an ident_t "loc" parameter could be used as a construct-specific property
3417 // (what if loc == 0?)
3418 // (if both construct-specific and team-specific variables were shared,
3419 // then unness extra syncs should be needed)
3420 // a thread-specific variable is better regarding two issues above (next
3421 // construct and extra syncs)
3422 // a thread-specific "th_local.reduction_method" variable is used currently
3423 // each thread executes 'determine' and 'set' lines (no need to execute by one
3424 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003425
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 packed_reduction_method = __kmp_determine_reduction_method(
3427 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3428 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431
Jonathan Peyton30419822017-05-12 18:01:32 +00003432 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3433 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003434
Jonathan Peyton30419822017-05-12 18:01:32 +00003435 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003436
Jonathan Peyton30419822017-05-12 18:01:32 +00003437 // usage: if team size == 1, no synchronization is required ( Intel
3438 // platforms only )
3439 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440
Jonathan Peyton30419822017-05-12 18:01:32 +00003441 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3446 // won't be called by the code gen)
3447 // (it's not quite good, because the checking block has been closed by
3448 // this 'pop',
3449 // but atomic operation has not been executed yet, will be executed
3450 // slightly later, literally on next instruction)
3451 if (__kmp_env_consistency_check)
3452 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003453
Jonathan Peyton30419822017-05-12 18:01:32 +00003454 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3455 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003456
Jonathan Peyton30419822017-05-12 18:01:32 +00003457// AT: performance issue: a real barrier here
3458// AT: (if master goes slow, other threads are blocked here waiting for the
3459// master to come and release them)
3460// AT: (it's not what a customer might expect specifying NOWAIT clause)
3461// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3462// be confusing to a customer)
3463// AT: another implementation of *barrier_gather*nowait() (or some other design)
3464// might go faster and be more in line with sense of NOWAIT
3465// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
Jonathan Peyton30419822017-05-12 18:01:32 +00003467// this barrier should be invisible to a customer and to the threading profile
3468// tool (it's neither a terminating barrier nor customer's code, it's
3469// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003470#if OMPT_SUPPORT
3471 // JP: can this barrier potentially leed to task scheduling?
3472 // JP: as long as there is a barrier in the implementation, OMPT should and
3473 // will provide the barrier events
3474 // so we set-up the necessary frame/return addresses.
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003475 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003476 if (ompt_enabled.enabled) {
3477 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003478 if (ompt_frame->enter_frame.ptr == NULL)
3479 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003480 OMPT_STORE_RETURN_ADDRESS(global_tid);
3481 }
3482#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003483#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003484 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003485#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003486 retval =
3487 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3488 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3489 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003490#if OMPT_SUPPORT && OMPT_OPTIONAL
3491 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003492 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003493 }
3494#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003495
Jonathan Peyton30419822017-05-12 18:01:32 +00003496 // all other workers except master should do this pop here
3497 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3498 if (__kmp_env_consistency_check) {
3499 if (retval == 0) {
3500 __kmp_pop_sync(global_tid, ct_reduce, loc);
3501 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003503
3504 } else {
3505
3506 // should never reach this block
3507 KMP_ASSERT(0); // "unexpected method"
3508 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003509#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003510 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003511 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003512 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003513#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003514 KA_TRACE(
3515 10,
3516 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3517 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003518
Jonathan Peyton30419822017-05-12 18:01:32 +00003519 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003520}
3521
3522/*!
3523@ingroup SYNCHRONIZATION
3524@param loc source location information
3525@param global_tid global thread id.
3526@param lck pointer to the unique lock data structure
3527
3528Finish the execution of a reduce nowait.
3529*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003530void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3531 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003532
Jonathan Peyton30419822017-05-12 18:01:32 +00003533 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003534
Jonathan Peyton30419822017-05-12 18:01:32 +00003535 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003536
Jonathan Peyton30419822017-05-12 18:01:32 +00003537 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538
Jonathan Peyton30419822017-05-12 18:01:32 +00003539 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540
Jonathan Peyton30419822017-05-12 18:01:32 +00003541 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003542
Jonathan Peyton30419822017-05-12 18:01:32 +00003543 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544
Jonathan Peyton30419822017-05-12 18:01:32 +00003545 // usage: if team size == 1, no synchronization is required ( on Intel
3546 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547
Jonathan Peyton30419822017-05-12 18:01:32 +00003548 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549
Jonathan Peyton30419822017-05-12 18:01:32 +00003550 // neither master nor other workers should get here
3551 // (code gen does not generate this call in case 2: atomic reduce block)
3552 // actually it's better to remove this elseif at all;
3553 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003554
Jonathan Peyton30419822017-05-12 18:01:32 +00003555 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3556 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003557
Jonathan Peyton30419822017-05-12 18:01:32 +00003558 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559
Jonathan Peyton30419822017-05-12 18:01:32 +00003560 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003561
Jonathan Peyton30419822017-05-12 18:01:32 +00003562 // should never reach this block
3563 KMP_ASSERT(0); // "unexpected method"
3564 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003565
Jonathan Peyton30419822017-05-12 18:01:32 +00003566 if (__kmp_env_consistency_check)
3567 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003568
Jonathan Peyton30419822017-05-12 18:01:32 +00003569 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3570 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003571
Jonathan Peyton30419822017-05-12 18:01:32 +00003572 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003573}
3574
3575/* 2.a.ii. Reduce Block with a terminating barrier */
3576
3577/*!
3578@ingroup SYNCHRONIZATION
3579@param loc source location information
3580@param global_tid global thread number
3581@param num_vars number of items (variables) to be reduced
3582@param reduce_size size of data in bytes to be reduced
3583@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003584@param reduce_func callback function providing reduction operation on two
3585operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003586@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003587@result 1 for the master thread, 0 for all other team threads, 2 for all team
3588threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003589
3590A blocking reduce that includes an implicit barrier.
3591*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003592kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3593 size_t reduce_size, void *reduce_data,
3594 void (*reduce_func)(void *lhs_data, void *rhs_data),
3595 kmp_critical_name *lck) {
3596 KMP_COUNT_BLOCK(REDUCE_wait);
3597 int retval = 0;
3598 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003599#if OMP_40_ENABLED
3600 kmp_info_t *th;
3601 kmp_team_t *team;
3602 int teams_swapped = 0, task_state;
3603#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003604
Jonathan Peyton30419822017-05-12 18:01:32 +00003605 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003606
Jonathan Peyton30419822017-05-12 18:01:32 +00003607 // why do we need this initialization here at all?
3608 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003609
Jonathan Peyton30419822017-05-12 18:01:32 +00003610 // do not call __kmp_serial_initialize(), it will be called by
3611 // __kmp_parallel_initialize() if needed
3612 // possible detection of false-positive race by the threadchecker ???
3613 if (!TCR_4(__kmp_init_parallel))
3614 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003615
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00003616#if OMP_50_ENABLED
3617 __kmp_resume_if_soft_paused();
3618#endif
3619
Jonathan Peyton30419822017-05-12 18:01:32 +00003620// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003621#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003622 if (__kmp_env_consistency_check)
3623 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003624#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 if (__kmp_env_consistency_check)
3626 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003627#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003628
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003629#if OMP_40_ENABLED
3630 th = __kmp_thread_from_gtid(global_tid);
3631 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3632#endif // OMP_40_ENABLED
3633
Jonathan Peyton30419822017-05-12 18:01:32 +00003634 packed_reduction_method = __kmp_determine_reduction_method(
3635 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3636 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003637
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639
Jonathan Peyton30419822017-05-12 18:01:32 +00003640 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3641 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003642
Jonathan Peyton30419822017-05-12 18:01:32 +00003643 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644
Jonathan Peyton30419822017-05-12 18:01:32 +00003645 // usage: if team size == 1, no synchronization is required ( Intel
3646 // platforms only )
3647 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003648
Jonathan Peyton30419822017-05-12 18:01:32 +00003649 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003650
Jonathan Peyton30419822017-05-12 18:01:32 +00003651 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003652
Jonathan Peyton30419822017-05-12 18:01:32 +00003653 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3654 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003655
Jonathan Peyton30419822017-05-12 18:01:32 +00003656// case tree_reduce_block:
3657// this barrier should be visible to a customer and to the threading profile
3658// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003659#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003660 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003661 if (ompt_enabled.enabled) {
3662 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003663 if (ompt_frame->enter_frame.ptr == NULL)
3664 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003665 OMPT_STORE_RETURN_ADDRESS(global_tid);
3666 }
3667#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003668#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003669 __kmp_threads[global_tid]->th.th_ident =
3670 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003671#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003672 retval =
3673 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3674 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3675 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003676#if OMPT_SUPPORT && OMPT_OPTIONAL
3677 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003678 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003679 }
3680#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 // all other workers except master should do this pop here
3683 // ( none of other workers except master will enter __kmpc_end_reduce() )
3684 if (__kmp_env_consistency_check) {
3685 if (retval == 0) { // 0: all other workers; 1: master
3686 __kmp_pop_sync(global_tid, ct_reduce, loc);
3687 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003688 }
3689
Jonathan Peyton30419822017-05-12 18:01:32 +00003690 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003691
Jonathan Peyton30419822017-05-12 18:01:32 +00003692 // should never reach this block
3693 KMP_ASSERT(0); // "unexpected method"
3694 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003695#if OMP_40_ENABLED
3696 if (teams_swapped) {
3697 __kmp_restore_swapped_teams(th, team, task_state);
3698 }
3699#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003700
3701 KA_TRACE(10,
3702 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3703 global_tid, packed_reduction_method, retval));
3704
3705 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706}
3707
3708/*!
3709@ingroup SYNCHRONIZATION
3710@param loc source location information
3711@param global_tid global thread id.
3712@param lck pointer to the unique lock data structure
3713
3714Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003715The <tt>lck</tt> pointer must be the same as that used in the corresponding
3716start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003717*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003718void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3719 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
Jonathan Peyton30419822017-05-12 18:01:32 +00003721 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003722#if OMP_40_ENABLED
3723 kmp_info_t *th;
3724 kmp_team_t *team;
3725 int teams_swapped = 0, task_state;
3726#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727
Jonathan Peyton30419822017-05-12 18:01:32 +00003728 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003729
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003730#if OMP_40_ENABLED
3731 th = __kmp_thread_from_gtid(global_tid);
3732 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3733#endif // OMP_40_ENABLED
3734
Jonathan Peyton30419822017-05-12 18:01:32 +00003735 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
Jonathan Peyton30419822017-05-12 18:01:32 +00003737 // this barrier should be visible to a customer and to the threading profile
3738 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739
Jonathan Peyton30419822017-05-12 18:01:32 +00003740 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
Jonathan Peyton30419822017-05-12 18:01:32 +00003742 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743
Jonathan Peyton30419822017-05-12 18:01:32 +00003744// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003745#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003746 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003747 if (ompt_enabled.enabled) {
3748 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003749 if (ompt_frame->enter_frame.ptr == NULL)
3750 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003751 OMPT_STORE_RETURN_ADDRESS(global_tid);
3752 }
3753#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003754#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003755 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003756#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003757 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003758#if OMPT_SUPPORT && OMPT_OPTIONAL
3759 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003760 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003761 }
3762#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003763
Jonathan Peyton30419822017-05-12 18:01:32 +00003764 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765
Jonathan Peyton30419822017-05-12 18:01:32 +00003766// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003767
Jonathan Peyton30419822017-05-12 18:01:32 +00003768// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003769#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003770 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003771 if (ompt_enabled.enabled) {
3772 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003773 if (ompt_frame->enter_frame.ptr == NULL)
3774 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003775 OMPT_STORE_RETURN_ADDRESS(global_tid);
3776 }
3777#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003778#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003779 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003780#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003781 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003782#if OMPT_SUPPORT && OMPT_OPTIONAL
3783 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003784 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003785 }
3786#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003787
Jonathan Peyton30419822017-05-12 18:01:32 +00003788 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003789
Joachim Protze82e94a52017-11-01 10:08:30 +00003790#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003791 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003792 if (ompt_enabled.enabled) {
3793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003794 if (ompt_frame->enter_frame.ptr == NULL)
3795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00003796 OMPT_STORE_RETURN_ADDRESS(global_tid);
3797 }
3798#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003799// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003800#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003801 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003802#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003803 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003804#if OMPT_SUPPORT && OMPT_OPTIONAL
3805 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003806 ompt_frame->enter_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003807 }
3808#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003809
Jonathan Peyton30419822017-05-12 18:01:32 +00003810 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3811 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003812
Jonathan Peyton30419822017-05-12 18:01:32 +00003813 // only master executes here (master releases all other workers)
3814 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3815 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003816
Jonathan Peyton30419822017-05-12 18:01:32 +00003817 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003818
Jonathan Peyton30419822017-05-12 18:01:32 +00003819 // should never reach this block
3820 KMP_ASSERT(0); // "unexpected method"
3821 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003822#if OMP_40_ENABLED
3823 if (teams_swapped) {
3824 __kmp_restore_swapped_teams(th, team, task_state);
3825 }
3826#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003827
Jonathan Peyton30419822017-05-12 18:01:32 +00003828 if (__kmp_env_consistency_check)
3829 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003830
Jonathan Peyton30419822017-05-12 18:01:32 +00003831 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3832 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003833
Jonathan Peyton30419822017-05-12 18:01:32 +00003834 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003835}
3836
3837#undef __KMP_GET_REDUCTION_METHOD
3838#undef __KMP_SET_REDUCTION_METHOD
3839
Jonathan Peyton30419822017-05-12 18:01:32 +00003840/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003841
Jonathan Peyton30419822017-05-12 18:01:32 +00003842kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843
Jonathan Peyton30419822017-05-12 18:01:32 +00003844 kmp_int32 gtid;
3845 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton30419822017-05-12 18:01:32 +00003847 gtid = __kmp_get_gtid();
3848 if (gtid < 0) {
3849 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003850 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 thread = __kmp_thread_from_gtid(gtid);
3852 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003853
3854} // __kmpc_get_taskid
3855
Jonathan Peyton30419822017-05-12 18:01:32 +00003856kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003857
Jonathan Peyton30419822017-05-12 18:01:32 +00003858 kmp_int32 gtid;
3859 kmp_info_t *thread;
3860 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003861
Jonathan Peyton30419822017-05-12 18:01:32 +00003862 gtid = __kmp_get_gtid();
3863 if (gtid < 0) {
3864 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003865 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003866 thread = __kmp_thread_from_gtid(gtid);
3867 parent_task = thread->th.th_current_task->td_parent;
3868 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003869
3870} // __kmpc_get_parent_taskid
3871
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003872#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003873/*!
3874@ingroup WORK_SHARING
3875@param loc source location information.
3876@param gtid global thread number.
3877@param num_dims number of associated doacross loops.
3878@param dims info on loops bounds.
3879
3880Initialize doacross loop information.
3881Expect compiler send us inclusive bounds,
3882e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3883*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003884void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003885 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003886 int j, idx;
3887 kmp_int64 last, trace_count;
3888 kmp_info_t *th = __kmp_threads[gtid];
3889 kmp_team_t *team = th->th.th_team;
3890 kmp_uint32 *flags;
3891 kmp_disp_t *pr_buf = th->th.th_dispatch;
3892 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003893
Jonathan Peyton30419822017-05-12 18:01:32 +00003894 KA_TRACE(
3895 20,
3896 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3897 gtid, num_dims, !team->t.t_serialized));
3898 KMP_DEBUG_ASSERT(dims != NULL);
3899 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003900
Jonathan Peyton30419822017-05-12 18:01:32 +00003901 if (team->t.t_serialized) {
3902 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3903 return; // no dependencies if team is serialized
3904 }
3905 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3906 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3907 // the next loop
3908 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003909
Jonathan Peyton30419822017-05-12 18:01:32 +00003910 // Save bounds info into allocated private buffer
3911 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3912 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3913 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3914 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3915 pr_buf->th_doacross_info[0] =
3916 (kmp_int64)num_dims; // first element is number of dimensions
3917 // Save also address of num_done in order to access it later without knowing
3918 // the buffer index
3919 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3920 pr_buf->th_doacross_info[2] = dims[0].lo;
3921 pr_buf->th_doacross_info[3] = dims[0].up;
3922 pr_buf->th_doacross_info[4] = dims[0].st;
3923 last = 5;
3924 for (j = 1; j < num_dims; ++j) {
3925 kmp_int64
3926 range_length; // To keep ranges of all dimensions but the first dims[0]
3927 if (dims[j].st == 1) { // most common case
3928 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3929 range_length = dims[j].up - dims[j].lo + 1;
3930 } else {
3931 if (dims[j].st > 0) {
3932 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3933 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3934 } else { // negative increment
3935 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3936 range_length =
3937 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3938 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003939 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003940 pr_buf->th_doacross_info[last++] = range_length;
3941 pr_buf->th_doacross_info[last++] = dims[j].lo;
3942 pr_buf->th_doacross_info[last++] = dims[j].up;
3943 pr_buf->th_doacross_info[last++] = dims[j].st;
3944 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003945
Jonathan Peyton30419822017-05-12 18:01:32 +00003946 // Compute total trip count.
3947 // Start with range of dims[0] which we don't need to keep in the buffer.
3948 if (dims[0].st == 1) { // most common case
3949 trace_count = dims[0].up - dims[0].lo + 1;
3950 } else if (dims[0].st > 0) {
3951 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3952 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3953 } else { // negative increment
3954 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3955 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3956 }
3957 for (j = 1; j < num_dims; ++j) {
3958 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3959 }
3960 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003961
Jonathan Peyton30419822017-05-12 18:01:32 +00003962 // Check if shared buffer is not occupied by other loop (idx -
3963 // __kmp_dispatch_num_buffers)
3964 if (idx != sh_buf->doacross_buf_idx) {
3965 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003966 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3967 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003968 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003969#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003970 // Check if we are the first thread. After the CAS the first thread gets 0,
3971 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003972 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3973 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3974 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3975#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003976 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003977 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3978#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003979 if (flags == NULL) {
3980 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003981 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003982 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3983 KMP_MB();
3984 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003985 } else if (flags == (kmp_uint32 *)1) {
3986#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003987 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003988 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3989#else
3990 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3991#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003992 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003993 KMP_MB();
3994 } else {
3995 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003996 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003997 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003998 pr_buf->th_doacross_flags =
3999 sh_buf->doacross_flags; // save private copy in order to not
4000 // touch shared buffer on each iteration
4001 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004002}
4003
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004004void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004005 kmp_int32 shft, num_dims, i;
4006 kmp_uint32 flag;
4007 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4008 kmp_info_t *th = __kmp_threads[gtid];
4009 kmp_team_t *team = th->th.th_team;
4010 kmp_disp_t *pr_buf;
4011 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004012
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4014 if (team->t.t_serialized) {
4015 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4016 return; // no dependencies if team is serialized
4017 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004018
Jonathan Peyton30419822017-05-12 18:01:32 +00004019 // calculate sequential iteration number and check out-of-bounds condition
4020 pr_buf = th->th.th_dispatch;
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 num_dims = pr_buf->th_doacross_info[0];
4023 lo = pr_buf->th_doacross_info[2];
4024 up = pr_buf->th_doacross_info[3];
4025 st = pr_buf->th_doacross_info[4];
4026 if (st == 1) { // most common case
4027 if (vec[0] < lo || vec[0] > up) {
4028 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4029 "bounds [%lld,%lld]\n",
4030 gtid, vec[0], lo, up));
4031 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004032 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004033 iter_number = vec[0] - lo;
4034 } else if (st > 0) {
4035 if (vec[0] < lo || vec[0] > up) {
4036 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4037 "bounds [%lld,%lld]\n",
4038 gtid, vec[0], lo, up));
4039 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004041 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4042 } else { // negative increment
4043 if (vec[0] > lo || vec[0] < up) {
4044 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4045 "bounds [%lld,%lld]\n",
4046 gtid, vec[0], lo, up));
4047 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004048 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004049 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4050 }
4051 for (i = 1; i < num_dims; ++i) {
4052 kmp_int64 iter, ln;
4053 kmp_int32 j = i * 4;
4054 ln = pr_buf->th_doacross_info[j + 1];
4055 lo = pr_buf->th_doacross_info[j + 2];
4056 up = pr_buf->th_doacross_info[j + 3];
4057 st = pr_buf->th_doacross_info[j + 4];
4058 if (st == 1) {
4059 if (vec[i] < lo || vec[i] > up) {
4060 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4061 "bounds [%lld,%lld]\n",
4062 gtid, vec[i], lo, up));
4063 return;
4064 }
4065 iter = vec[i] - lo;
4066 } else if (st > 0) {
4067 if (vec[i] < lo || vec[i] > up) {
4068 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4069 "bounds [%lld,%lld]\n",
4070 gtid, vec[i], lo, up));
4071 return;
4072 }
4073 iter = (kmp_uint64)(vec[i] - lo) / st;
4074 } else { // st < 0
4075 if (vec[i] > lo || vec[i] < up) {
4076 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4077 "bounds [%lld,%lld]\n",
4078 gtid, vec[i], lo, up));
4079 return;
4080 }
4081 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4082 }
4083 iter_number = iter + ln * iter_number;
4084 }
4085 shft = iter_number % 32; // use 32-bit granularity
4086 iter_number >>= 5; // divided by 32
4087 flag = 1 << shft;
4088 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4089 KMP_YIELD(TRUE);
4090 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004091 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004092 KA_TRACE(20,
4093 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4094 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004095}
4096
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004097void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004098 kmp_int32 shft, num_dims, i;
4099 kmp_uint32 flag;
4100 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4101 kmp_info_t *th = __kmp_threads[gtid];
4102 kmp_team_t *team = th->th.th_team;
4103 kmp_disp_t *pr_buf;
4104 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004105
Jonathan Peyton30419822017-05-12 18:01:32 +00004106 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4107 if (team->t.t_serialized) {
4108 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4109 return; // no dependencies if team is serialized
4110 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004111
Jonathan Peyton30419822017-05-12 18:01:32 +00004112 // calculate sequential iteration number (same as in "wait" but no
4113 // out-of-bounds checks)
4114 pr_buf = th->th.th_dispatch;
4115 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4116 num_dims = pr_buf->th_doacross_info[0];
4117 lo = pr_buf->th_doacross_info[2];
4118 st = pr_buf->th_doacross_info[4];
4119 if (st == 1) { // most common case
4120 iter_number = vec[0] - lo;
4121 } else if (st > 0) {
4122 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4123 } else { // negative increment
4124 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4125 }
4126 for (i = 1; i < num_dims; ++i) {
4127 kmp_int64 iter, ln;
4128 kmp_int32 j = i * 4;
4129 ln = pr_buf->th_doacross_info[j + 1];
4130 lo = pr_buf->th_doacross_info[j + 2];
4131 st = pr_buf->th_doacross_info[j + 4];
4132 if (st == 1) {
4133 iter = vec[i] - lo;
4134 } else if (st > 0) {
4135 iter = (kmp_uint64)(vec[i] - lo) / st;
4136 } else { // st < 0
4137 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004138 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004139 iter_number = iter + ln * iter_number;
4140 }
4141 shft = iter_number % 32; // use 32-bit granularity
4142 iter_number >>= 5; // divided by 32
4143 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004144 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004145 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004146 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004147 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4148 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004149}
4150
Jonathan Peyton30419822017-05-12 18:01:32 +00004151void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004152 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004153 kmp_info_t *th = __kmp_threads[gtid];
4154 kmp_team_t *team = th->th.th_team;
4155 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004156
Jonathan Peyton30419822017-05-12 18:01:32 +00004157 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4158 if (team->t.t_serialized) {
4159 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4160 return; // nothing to do
4161 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004162 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004163 if (num_done == th->th.th_team_nproc) {
4164 // we are the last thread, need to free shared resources
4165 int idx = pr_buf->th_doacross_buf_idx - 1;
4166 dispatch_shared_info_t *sh_buf =
4167 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4168 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4169 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004170 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004171 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004172 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004173 sh_buf->doacross_flags = NULL;
4174 sh_buf->doacross_num_done = 0;
4175 sh_buf->doacross_buf_idx +=
4176 __kmp_dispatch_num_buffers; // free buffer for future re-use
4177 }
4178 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004179 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004180 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4181 pr_buf->th_doacross_info = NULL;
4182 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004183}
4184#endif
4185
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004186#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004187int __kmpc_get_target_offload(void) {
4188 if (!__kmp_init_serial) {
4189 __kmp_serial_initialize();
4190 }
4191 return __kmp_target_offload;
4192}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00004193
4194int __kmpc_pause_resource(kmp_pause_status_t level) {
4195 if (!__kmp_init_serial) {
4196 return 1; // Can't pause if runtime is not initialized
4197 }
4198 return __kmp_pause_resource(level);
4199}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004200#endif // OMP_50_ENABLED
4201
Jim Cownie5e8470a2013-09-27 10:38:44 +00004202// end of file //