blob: 51934cbdffcad73abc476bdb4759f5a5a19c5123 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton92ca6182018-09-07 18:25:49 +000014#define __KMP_IMP
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000020#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#include "ompt-specific.h"
25#endif
26
Jim Cownie5e8470a2013-09-27 10:38:44 +000027#define MAX_MESSAGE 512
28
Jonathan Peyton30419822017-05-12 18:01:32 +000029// flags will be used in future, e.g. to implement openmp_strict library
30// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
32/*!
33 * @ingroup STARTUP_SHUTDOWN
34 * @param loc in source location information
35 * @param flags in for future use (currently ignored)
36 *
37 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000039 */
Jonathan Peyton30419822017-05-12 18:01:32 +000040void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41 // By default __kmpc_begin() is no-op.
42 char *env;
43 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44 __kmp_str_match_true(env)) {
45 __kmp_middle_initialize();
46 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47 } else if (__kmp_ignore_mppbeg() == FALSE) {
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 __kmp_internal_begin();
50 KC_TRACE(10, ("__kmpc_begin: called\n"));
51 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000052}
53
54/*!
55 * @ingroup STARTUP_SHUTDOWN
56 * @param loc source location information
57 *
Jonathan Peyton30419822017-05-12 18:01:32 +000058 * Shutdown the runtime library. This is also optional, and even if called will
59 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
60 * zero.
61 */
62void __kmpc_end(ident_t *loc) {
63 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66 // returns FALSE and __kmpc_end() will unregister this root (it can cause
67 // library shut down).
68 if (__kmp_ignore_mppend() == FALSE) {
69 KC_TRACE(10, ("__kmpc_end: called\n"));
70 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072 __kmp_internal_end_thread(-1);
73 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000074}
75
76/*!
77@ingroup THREAD_STATES
78@param loc Source location information.
79@return The global thread index of the active thread.
80
81This function can be called in any context.
82
83If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000084single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
85that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000086active parallel construct. (Or zero if there is no active parallel
87construct, since the master thread is necessarily thread zero).
88
89If multiple non-OpenMP threads all enter an OpenMP construct then this
90will be a unique thread identifier among all the threads created by
91the OpenMP runtime (but the value cannote be defined in terms of
92OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000093*/
Jonathan Peyton30419822017-05-12 18:01:32 +000094kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
95 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +000096
Jonathan Peyton30419822017-05-12 18:01:32 +000097 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Jonathan Peyton30419822017-05-12 18:01:32 +000099 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100}
101
102/*!
103@ingroup THREAD_STATES
104@param loc Source location information.
105@return The number of threads under control of the OpenMP<sup>*</sup> runtime
106
107This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000108It returns the total number of threads under the control of the OpenMP runtime.
109That is not a number that can be determined by any OpenMP standard calls, since
110the library may be called from more than one non-OpenMP thread, and this
111reflects the total over all such calls. Similarly the runtime maintains
112underlying threads even when they are not active (since the cost of creating
113and destroying OS threads is high), this call counts all such threads even if
114they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000116kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
117 KC_TRACE(10,
118 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119
Jonathan Peyton30419822017-05-12 18:01:32 +0000120 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121}
122
123/*!
124@ingroup THREAD_STATES
125@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000126@return The thread number of the calling thread in the innermost active parallel
127construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000129kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
130 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
131 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000132}
133
134/*!
135@ingroup THREAD_STATES
136@param loc Source location information.
137@return The number of threads in the innermost active parallel construct.
138*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000139kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
140 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141
Jonathan Peyton30419822017-05-12 18:01:32 +0000142 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000143}
144
145/*!
146 * @ingroup DEPRECATED
147 * @param loc location description
148 *
149 * This function need not be called. It always returns TRUE.
150 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000151kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000152#ifndef KMP_DEBUG
153
Jonathan Peyton30419822017-05-12 18:01:32 +0000154 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155
156#else
157
Jonathan Peyton30419822017-05-12 18:01:32 +0000158 const char *semi2;
159 const char *semi3;
160 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 }
165 semi2 = loc->psource;
166 if (semi2 == NULL) {
167 return TRUE;
168 }
169 semi2 = strchr(semi2, ';');
170 if (semi2 == NULL) {
171 return TRUE;
172 }
173 semi2 = strchr(semi2 + 1, ';');
174 if (semi2 == NULL) {
175 return TRUE;
176 }
177 if (__kmp_par_range_filename[0]) {
178 const char *name = semi2 - 1;
179 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
180 name--;
181 }
182 if ((*name == '/') || (*name == ';')) {
183 name++;
184 }
185 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
186 return __kmp_par_range < 0;
187 }
188 }
189 semi3 = strchr(semi2 + 1, ';');
190 if (__kmp_par_range_routine[0]) {
191 if ((semi3 != NULL) && (semi3 > semi2) &&
192 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
193 return __kmp_par_range < 0;
194 }
195 }
196 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
197 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
198 return __kmp_par_range > 0;
199 }
200 return __kmp_par_range < 0;
201 }
202 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203
204#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205}
206
207/*!
208@ingroup THREAD_STATES
209@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000210@return 1 if this thread is executing inside an active parallel region, zero if
211not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000213kmp_int32 __kmpc_in_parallel(ident_t *loc) {
214 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000215}
216
217/*!
218@ingroup PARALLEL
219@param loc source location information
220@param global_tid global thread number
221@param num_threads number of threads requested for this parallel construct
222
223Set the number of threads to be used by the next fork spawned by this thread.
224This call is only required if the parallel construct has a `num_threads` clause.
225*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000226void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
227 kmp_int32 num_threads) {
228 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
229 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000230
Jonathan Peyton30419822017-05-12 18:01:32 +0000231 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000232}
233
Jonathan Peyton30419822017-05-12 18:01:32 +0000234void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
235 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236
Jonathan Peyton30419822017-05-12 18:01:32 +0000237 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238}
239
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240#if OMP_40_ENABLED
241
Jonathan Peyton30419822017-05-12 18:01:32 +0000242void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
243 kmp_int32 proc_bind) {
244 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
245 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246
Jonathan Peyton30419822017-05-12 18:01:32 +0000247 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248}
249
250#endif /* OMP_40_ENABLED */
251
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252/*!
253@ingroup PARALLEL
254@param loc source location information
255@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000256@param microtask pointer to callback routine consisting of outlined parallel
257construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000258@param ... pointers to shared variables that aren't global
259
260Do the actual fork and call the microtask in the relevant number of threads.
261*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000264
Jonathan Peyton61118492016-05-20 19:03:38 +0000265#if (KMP_STATS_ENABLED)
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000274 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000279 }
280#endif
281
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 // maybe to save thr_state is enough here
283 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 va_list ap;
285 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000287#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000288 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000289 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 if (lwt)
294 ompt_frame = &(lwt->ompt_task_info.frame);
295 else {
296 int tid = __kmp_tid_from_gtid(gtid);
297 ompt_frame = &(
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 }
Joachim Protzec255ca72017-11-05 14:11:10 +0000300 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000301 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000302 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000303#endif
304
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000305#if INCLUDE_SSC_MARKS
306 SSC_MARK_FORKING();
307#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000312#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000313 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000314#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000318#if INCLUDE_SSC_MARKS
319 SSC_MARK_JOINING();
320#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000322#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 ,
324 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000325#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000326 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000327
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +0000330
331#if KMP_STATS_ENABLED
332 if (previous_state == stats_state_e::SERIAL_REGION) {
333 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
334 } else {
335 KMP_POP_PARTITIONED_TIMER();
336 }
337#endif // KMP_STATS_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338}
339
340#if OMP_40_ENABLED
341/*!
342@ingroup PARALLEL
343@param loc source location information
344@param global_tid global thread number
345@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000346@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347
348Set the number of teams to be used by the teams construct.
349This call is only required if the teams construct has a `num_teams` clause
350or a `thread_limit` clause (or both).
351*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000352void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
353 kmp_int32 num_teams, kmp_int32 num_threads) {
354 KA_TRACE(20,
355 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
356 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357
Jonathan Peyton30419822017-05-12 18:01:32 +0000358 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359}
360
361/*!
362@ingroup PARALLEL
363@param loc source location information
364@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000365@param microtask pointer to callback routine consisting of outlined teams
366construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367@param ... pointers to shared variables that aren't global
368
369Do the actual fork and call the microtask in the relevant number of threads.
370*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000371void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
372 ...) {
373 int gtid = __kmp_entry_gtid();
374 kmp_info_t *this_thr = __kmp_threads[gtid];
375 va_list ap;
376 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377
Jonathan Peyton30419822017-05-12 18:01:32 +0000378 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000379
Jonathan Peyton30419822017-05-12 18:01:32 +0000380 // remember teams entry point and nesting level
381 this_thr->th.th_teams_microtask = microtask;
382 this_thr->th.th_teams_level =
383 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000385#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000386 kmp_team_t *parent_team = this_thr->th.th_team;
387 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000388 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000389 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protzec255ca72017-11-05 14:11:10 +0000390 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Jonathan Peyton30419822017-05-12 18:01:32 +0000391 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000392 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000393#endif
394
Jonathan Peyton30419822017-05-12 18:01:32 +0000395 // check if __kmpc_push_num_teams called, set default number of teams
396 // otherwise
397 if (this_thr->th.th_teams_size.nteams == 0) {
398 __kmp_push_num_teams(loc, gtid, 0, 0);
399 }
400 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
401 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000403
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000405 VOLATILE_CAST(microtask_t)
406 __kmp_teams_master, // "wrapped" task
407 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000408#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000409 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000413 );
414 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000415#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000416 ,
417 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000418#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000420
Jonathan Peyton30419822017-05-12 18:01:32 +0000421 this_thr->th.th_teams_microtask = NULL;
422 this_thr->th.th_teams_level = 0;
423 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
424 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425}
426#endif /* OMP_40_ENABLED */
427
Jim Cownie5e8470a2013-09-27 10:38:44 +0000428// I don't think this function should ever have been exported.
429// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
430// openmp code ever called it, but it's been exported from the RTL for so
431// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000432int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433
434/*!
435@ingroup PARALLEL
436@param loc source location information
437@param global_tid global thread number
438
439Enter a serialized parallel construct. This interface is used to handle a
440conditional parallel region, like this,
441@code
442#pragma omp parallel if (condition)
443@endcode
444when the condition is false.
445*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000446void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000447// The implementation is now in kmp_runtime.cpp so that it can share static
448// functions with kmp_fork_call since the tasks to be done are similar in
449// each case.
450#if OMPT_SUPPORT
451 OMPT_STORE_RETURN_ADDRESS(global_tid);
452#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000453 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454}
455
456/*!
457@ingroup PARALLEL
458@param loc source location information
459@param global_tid global thread number
460
461Leave a serialized parallel construct.
462*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000463void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
464 kmp_internal_control_t *top;
465 kmp_info_t *this_thr;
466 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000467
Jonathan Peyton30419822017-05-12 18:01:32 +0000468 KC_TRACE(10,
469 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471 /* skip all this code for autopar serialized loops since it results in
472 unacceptable overhead */
473 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
474 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 // Not autopar code
477 if (!TCR_4(__kmp_init_parallel))
478 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479
Jonathan Peyton30419822017-05-12 18:01:32 +0000480 this_thr = __kmp_threads[global_tid];
481 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482
Jonathan Peyton30419822017-05-12 18:01:32 +0000483#if OMP_45_ENABLED
484 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000485
Jonathan Peyton30419822017-05-12 18:01:32 +0000486 // we need to wait for the proxy tasks before finishing the thread
487 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
488 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
489#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 KMP_MB();
492 KMP_DEBUG_ASSERT(serial_team);
493 KMP_ASSERT(serial_team->t.t_serialized);
494 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
495 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
496 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
497 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000498
Joachim Protze82e94a52017-11-01 10:08:30 +0000499#if OMPT_SUPPORT
500 if (ompt_enabled.enabled &&
501 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000502 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +0000503 if (ompt_enabled.ompt_callback_implicit_task) {
504 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
505 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +0000506 OMPT_CUR_TASK_INFO(this_thr)->thread_num);
Joachim Protze82e94a52017-11-01 10:08:30 +0000507 }
508
509 // reset clear the task id only after unlinking the task
510 ompt_data_t *parent_task_data;
511 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
512
513 if (ompt_enabled.ompt_callback_parallel_end) {
514 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
515 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
Joachim Protze489cdb72018-09-10 14:34:54 +0000516 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
Joachim Protze82e94a52017-11-01 10:08:30 +0000517 }
518 __ompt_lw_taskteam_unlink(this_thr);
519 this_thr->th.ompt_thread_info.state = omp_state_overhead;
520 }
521#endif
522
Jonathan Peyton30419822017-05-12 18:01:32 +0000523 /* If necessary, pop the internal control stack values and replace the team
524 * values */
525 top = serial_team->t.t_control_stack_top;
526 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
527 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
528 serial_team->t.t_control_stack_top = top->next;
529 __kmp_free(top);
530 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000531
Jonathan Peyton30419822017-05-12 18:01:32 +0000532 // if( serial_team -> t.t_serialized > 1 )
533 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534
Jonathan Peyton30419822017-05-12 18:01:32 +0000535 /* pop dispatch buffers stack */
536 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
537 {
538 dispatch_private_info_t *disp_buffer =
539 serial_team->t.t_dispatch->th_disp_buffer;
540 serial_team->t.t_dispatch->th_disp_buffer =
541 serial_team->t.t_dispatch->th_disp_buffer->next;
542 __kmp_free(disp_buffer);
543 }
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000544#if OMP_50_ENABLED
545 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
546#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000547
Jonathan Peyton30419822017-05-12 18:01:32 +0000548 --serial_team->t.t_serialized;
549 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000550
Jonathan Peyton30419822017-05-12 18:01:32 +0000551/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000552
553#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000554 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
555 __kmp_clear_x87_fpu_status_word();
556 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
557 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
558 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
560
Jonathan Peyton30419822017-05-12 18:01:32 +0000561 this_thr->th.th_team = serial_team->t.t_parent;
562 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000563
Jonathan Peyton30419822017-05-12 18:01:32 +0000564 /* restore values cached in the thread */
565 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
566 this_thr->th.th_team_master =
567 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
568 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569
Jonathan Peyton30419822017-05-12 18:01:32 +0000570 /* TODO the below shouldn't need to be adjusted for serialized teams */
571 this_thr->th.th_dispatch =
572 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000573
Jonathan Peyton30419822017-05-12 18:01:32 +0000574 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575
Jonathan Peyton30419822017-05-12 18:01:32 +0000576 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
577 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000578
Jonathan Peyton30419822017-05-12 18:01:32 +0000579 if (__kmp_tasking_mode != tskm_immediate_exec) {
580 // Copy the task team from the new child / old parent team to the thread.
581 this_thr->th.th_task_team =
582 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
583 KA_TRACE(20,
584 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
585 "team %p\n",
586 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000588 } else {
589 if (__kmp_tasking_mode != tskm_immediate_exec) {
590 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
591 "depth of serial team %p to %d\n",
592 global_tid, serial_team, serial_team->t.t_serialized));
593 }
594 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595
Jonathan Peyton30419822017-05-12 18:01:32 +0000596 if (__kmp_env_consistency_check)
597 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000598#if OMPT_SUPPORT
599 if (ompt_enabled.enabled)
600 this_thr->th.ompt_thread_info.state =
601 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
602 : omp_state_work_parallel);
603#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000604}
605
606/*!
607@ingroup SYNCHRONIZATION
608@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000609
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000610Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611depending on the memory ordering convention obeyed by the compiler
612even that may not be necessary).
613*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000614void __kmpc_flush(ident_t *loc) {
615 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616
Jonathan Peyton30419822017-05-12 18:01:32 +0000617 /* need explicit __mf() here since use volatile instead in library */
618 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619
Jonathan Peyton30419822017-05-12 18:01:32 +0000620#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
621#if KMP_MIC
622// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
623// We shouldn't need it, though, since the ABI rules require that
624// * If the compiler generates NGO stores it also generates the fence
625// * If users hand-code NGO stores they should insert the fence
626// therefore no incomplete unordered stores should be visible.
627#else
628 // C74404
629 // This is to address non-temporal store instructions (sfence needed).
630 // The clflush instruction is addressed either (mfence needed).
631 // Probably the non-temporal load monvtdqa instruction should also be
632 // addressed.
633 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
634 if (!__kmp_cpuinfo.initialized) {
635 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000636 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000637 if (!__kmp_cpuinfo.sse2) {
638 // CPU cannot execute SSE2 instructions.
639 } else {
640#if KMP_COMPILER_ICC
641 _mm_mfence();
642#elif KMP_COMPILER_MSVC
643 MemoryBarrier();
644#else
645 __sync_synchronize();
646#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000647 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000648#endif // KMP_MIC
649#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
650// Nothing to see here move along
651#elif KMP_ARCH_PPC64
652// Nothing needed here (we have a real MB above).
653#if KMP_OS_CNK
654 // The flushing thread needs to yield here; this prevents a
655 // busy-waiting thread from saturating the pipeline. flush is
656 // often used in loops like this:
657 // while (!flag) {
658 // #pragma omp flush(flag)
659 // }
660 // and adding the yield here is good for at least a 10x speedup
661 // when running >2 threads per core (on the NAS LU benchmark).
662 __kmp_yield(TRUE);
663#endif
664#else
665#error Unknown or unsupported architecture
666#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000667
668#if OMPT_SUPPORT && OMPT_OPTIONAL
669 if (ompt_enabled.ompt_callback_flush) {
670 ompt_callbacks.ompt_callback(ompt_callback_flush)(
671 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
672 }
673#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674}
675
676/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677/*!
678@ingroup SYNCHRONIZATION
679@param loc source location information
680@param global_tid thread id.
681
682Execute a barrier.
683*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000684void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
685 KMP_COUNT_BLOCK(OMP_BARRIER);
686 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687
Jonathan Peyton30419822017-05-12 18:01:32 +0000688 if (!TCR_4(__kmp_init_parallel))
689 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690
Jonathan Peyton30419822017-05-12 18:01:32 +0000691 if (__kmp_env_consistency_check) {
692 if (loc == 0) {
693 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 __kmp_check_barrier(global_tid, ct_barrier, loc);
697 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698
Joachim Protze82e94a52017-11-01 10:08:30 +0000699#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +0000700 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000701 if (ompt_enabled.enabled) {
702 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +0000703 if (ompt_frame->enter_frame == NULL)
704 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000705 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000706 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000707#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000708 __kmp_threads[global_tid]->th.th_ident = loc;
709 // TODO: explicit barrier_wait_id:
710 // this function is called when 'barrier' directive is present or
711 // implicit barrier at the end of a worksharing construct.
712 // 1) better to add a per-thread barrier counter to a thread data structure
713 // 2) set to 0 when a new team is created
714 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000717#if OMPT_SUPPORT && OMPT_OPTIONAL
718 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000719 ompt_frame->enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000721#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722}
723
724/* The BARRIER for a MASTER section is always explicit */
725/*!
726@ingroup WORK_SHARING
727@param loc source location information.
728@param global_tid global thread number .
729@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
730*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000731kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
732 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000735
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 if (!TCR_4(__kmp_init_parallel))
737 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738
Jonathan Peyton30419822017-05-12 18:01:32 +0000739 if (KMP_MASTER_GTID(global_tid)) {
740 KMP_COUNT_BLOCK(OMP_MASTER);
741 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
742 status = 1;
743 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744
Joachim Protze82e94a52017-11-01 10:08:30 +0000745#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000746 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000747 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 kmp_info_t *this_thr = __kmp_threads[global_tid];
749 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000752 ompt_callbacks.ompt_callback(ompt_callback_master)(
753 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
754 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
755 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000756 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000757 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000758#endif
759
Jonathan Peyton30419822017-05-12 18:01:32 +0000760 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000761#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 if (status)
763 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
764 else
765 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000766#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000767 if (status)
768 __kmp_push_sync(global_tid, ct_master, loc, NULL);
769 else
770 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000771#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000772 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000773
Jonathan Peyton30419822017-05-12 18:01:32 +0000774 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775}
776
777/*!
778@ingroup WORK_SHARING
779@param loc source location information.
780@param global_tid global thread number .
781
Jonathan Peyton30419822017-05-12 18:01:32 +0000782Mark the end of a <tt>master</tt> region. This should only be called by the
783thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000785void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
786 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000787
Jonathan Peyton30419822017-05-12 18:01:32 +0000788 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
789 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790
Joachim Protze82e94a52017-11-01 10:08:30 +0000791#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 kmp_info_t *this_thr = __kmp_threads[global_tid];
793 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000794 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000795 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000796 ompt_callbacks.ompt_callback(ompt_callback_master)(
797 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
798 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
799 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000800 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000801#endif
802
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 if (__kmp_env_consistency_check) {
804 if (global_tid < 0)
805 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 if (KMP_MASTER_GTID(global_tid))
808 __kmp_pop_sync(global_tid, ct_master, loc);
809 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000810}
811
812/*!
813@ingroup WORK_SHARING
814@param loc source location information.
815@param gtid global thread number.
816
817Start execution of an <tt>ordered</tt> construct.
818*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000819void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
820 int cid = 0;
821 kmp_info_t *th;
822 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000823
Jonathan Peyton30419822017-05-12 18:01:32 +0000824 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 if (!TCR_4(__kmp_init_parallel))
827 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828
829#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000830 __kmp_itt_ordered_prep(gtid);
831// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832#endif /* USE_ITT_BUILD */
833
Jonathan Peyton30419822017-05-12 18:01:32 +0000834 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835
Joachim Protze82e94a52017-11-01 10:08:30 +0000836#if OMPT_SUPPORT && OMPT_OPTIONAL
837 kmp_team_t *team;
Joachim Protze40636132018-05-28 08:16:08 +0000838 omp_wait_id_t lck;
Joachim Protze82e94a52017-11-01 10:08:30 +0000839 void *codeptr_ra;
840 if (ompt_enabled.enabled) {
841 OMPT_STORE_RETURN_ADDRESS(gtid);
842 team = __kmp_team_from_gtid(gtid);
Joachim Protze40636132018-05-28 08:16:08 +0000843 lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000845 th->th.ompt_thread_info.wait_id = lck;
846 th->th.ompt_thread_info.state = omp_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000847
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000849 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
850 if (ompt_enabled.ompt_callback_mutex_acquire) {
851 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000852 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze40636132018-05-28 08:16:08 +0000853 (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000854 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000855 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000856#endif
857
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 if (th->th.th_dispatch->th_deo_fcn != 0)
859 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
860 else
861 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862
Joachim Protze82e94a52017-11-01 10:08:30 +0000863#if OMPT_SUPPORT && OMPT_OPTIONAL
864 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000865 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000866 th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000867 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000868
Jonathan Peyton30419822017-05-12 18:01:32 +0000869 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000870 if (ompt_enabled.ompt_callback_mutex_acquired) {
871 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +0000872 ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000873 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000874 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000875#endif
876
Jim Cownie5e8470a2013-09-27 10:38:44 +0000877#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000878 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000879#endif /* USE_ITT_BUILD */
880}
881
882/*!
883@ingroup WORK_SHARING
884@param loc source location information.
885@param gtid global thread number.
886
887End execution of an <tt>ordered</tt> construct.
888*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000889void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
890 int cid = 0;
891 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000892
Jonathan Peyton30419822017-05-12 18:01:32 +0000893 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000894
895#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000896 __kmp_itt_ordered_end(gtid);
897// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000898#endif /* USE_ITT_BUILD */
899
Jonathan Peyton30419822017-05-12 18:01:32 +0000900 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000901
Jonathan Peyton30419822017-05-12 18:01:32 +0000902 if (th->th.th_dispatch->th_dxo_fcn != 0)
903 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
904 else
905 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000906
Joachim Protze82e94a52017-11-01 10:08:30 +0000907#if OMPT_SUPPORT && OMPT_OPTIONAL
908 OMPT_STORE_RETURN_ADDRESS(gtid);
909 if (ompt_enabled.ompt_callback_mutex_released) {
910 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
911 ompt_mutex_ordered,
Joachim Protze40636132018-05-28 08:16:08 +0000912 (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
Joachim Protze82e94a52017-11-01 10:08:30 +0000913 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000914 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000915#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000916}
917
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000918#if KMP_USE_DYNAMIC_LOCK
919
Jonathan Peytondae13d82015-12-11 21:57:06 +0000920static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000921__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
922 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
923 // Pointer to the allocated indirect lock is written to crit, while indexing
924 // is ignored.
925 void *idx;
926 kmp_indirect_lock_t **lck;
927 lck = (kmp_indirect_lock_t **)crit;
928 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
929 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
930 KMP_SET_I_LOCK_LOCATION(ilk, loc);
931 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
932 KA_TRACE(20,
933 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000934#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000935 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000936#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000937 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000939#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000940 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000941#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000942 // We don't really need to destroy the unclaimed lock here since it will be
943 // cleaned up at program exit.
944 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
945 }
946 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000947}
948
949// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000950#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
951 { \
952 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000953 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
954 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
955 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
956 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 kmp_uint32 spins; \
958 KMP_FSYNC_PREPARE(l); \
959 KMP_INIT_YIELD(spins); \
960 if (TCR_4(__kmp_nth) > \
961 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
962 KMP_YIELD(TRUE); \
963 } else { \
964 KMP_YIELD_SPIN(spins); \
965 } \
966 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000967 while ( \
968 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
969 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
Jonathan Peyton30419822017-05-12 18:01:32 +0000970 __kmp_spin_backoff(&backoff); \
971 if (TCR_4(__kmp_nth) > \
972 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
973 KMP_YIELD(TRUE); \
974 } else { \
975 KMP_YIELD_SPIN(spins); \
976 } \
977 } \
978 } \
979 KMP_FSYNC_ACQUIRED(l); \
980 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000981
982// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000983#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
984 { \
985 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000986 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
987 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
988 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
989 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
Jonathan Peyton30419822017-05-12 18:01:32 +0000990 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000991
992// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000993#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000994 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000995
Jonathan Peytondae13d82015-12-11 21:57:06 +0000996#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000997
Jonathan Peyton30419822017-05-12 18:01:32 +0000998#include <sys/syscall.h>
999#include <unistd.h>
1000#ifndef FUTEX_WAIT
1001#define FUTEX_WAIT 0
1002#endif
1003#ifndef FUTEX_WAKE
1004#define FUTEX_WAKE 1
1005#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001006
1007// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001008#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1009 { \
1010 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1011 kmp_int32 gtid_code = (gtid + 1) << 1; \
1012 KMP_MB(); \
1013 KMP_FSYNC_PREPARE(ftx); \
1014 kmp_int32 poll_val; \
1015 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1016 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1017 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1018 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1019 if (!cond) { \
1020 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1021 poll_val | \
1022 KMP_LOCK_BUSY(1, futex))) { \
1023 continue; \
1024 } \
1025 poll_val |= KMP_LOCK_BUSY(1, futex); \
1026 } \
1027 kmp_int32 rc; \
1028 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1029 NULL, NULL, 0)) != 0) { \
1030 continue; \
1031 } \
1032 gtid_code |= 1; \
1033 } \
1034 KMP_FSYNC_ACQUIRED(ftx); \
1035 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001036
1037// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001038#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1039 { \
1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1042 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1043 KMP_FSYNC_ACQUIRED(ftx); \
1044 rc = TRUE; \
1045 } else { \
1046 rc = FALSE; \
1047 } \
1048 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001049
1050// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001051#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1052 { \
1053 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1054 KMP_MB(); \
1055 KMP_FSYNC_RELEASING(ftx); \
1056 kmp_int32 poll_val = \
1057 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1058 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1059 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1060 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1061 } \
1062 KMP_MB(); \
1063 KMP_YIELD(TCR_4(__kmp_nth) > \
1064 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1065 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001066
Jonathan Peytondae13d82015-12-11 21:57:06 +00001067#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001068
1069#else // KMP_USE_DYNAMIC_LOCK
1070
Jonathan Peyton30419822017-05-12 18:01:32 +00001071static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1072 ident_t const *loc,
1073 kmp_int32 gtid) {
1074 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 // Because of the double-check, the following load doesn't need to be volatile
1077 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 if (lck == NULL) {
1080 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001081
Jonathan Peyton30419822017-05-12 18:01:32 +00001082 // Allocate & initialize the lock.
1083 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1084 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1085 __kmp_init_user_lock_with_checks(lck);
1086 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001088 __kmp_itt_critical_creating(lck);
1089// __kmp_itt_critical_creating() should be called *before* the first usage
1090// of underlying lock. It is the only place where we can guarantee it. There
1091// are chances the lock will destroyed with no usage, but it is not a
1092// problem, because this is not real event seen by user but rather setting
1093// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094#endif /* USE_ITT_BUILD */
1095
Jonathan Peyton30419822017-05-12 18:01:32 +00001096 // Use a cmpxchg instruction to slam the start of the critical section with
1097 // the lock pointer. If another thread beat us to it, deallocate the lock,
1098 // and use the lock that the other thread allocated.
1099 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100
Jonathan Peyton30419822017-05-12 18:01:32 +00001101 if (status == 0) {
1102// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001104 __kmp_itt_critical_destroyed(lck);
1105// Let ITT know the lock is destroyed and the same memory location may be reused
1106// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 __kmp_destroy_user_lock_with_checks(lck);
1109 __kmp_user_lock_free(&idx, gtid, lck);
1110 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1111 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001113 }
1114 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115}
1116
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001117#endif // KMP_USE_DYNAMIC_LOCK
1118
Jim Cownie5e8470a2013-09-27 10:38:44 +00001119/*!
1120@ingroup WORK_SHARING
1121@param loc source location information.
1122@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001123@param crit identity of the critical section. This could be a pointer to a lock
1124associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001125
1126Enter code protected by a `critical` construct.
1127This function blocks until the executing thread can enter the critical section.
1128*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001129void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1130 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001131#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001132#if OMPT_SUPPORT && OMPT_OPTIONAL
1133 OMPT_STORE_RETURN_ADDRESS(global_tid);
1134#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001135 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001136#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 KMP_COUNT_BLOCK(OMP_CRITICAL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001138#if OMPT_SUPPORT && OMPT_OPTIONAL
1139 omp_state_t prev_state = omp_state_undefined;
1140 ompt_thread_info_t ti;
1141#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001142 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143
Jonathan Peyton30419822017-05-12 18:01:32 +00001144 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001145
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001147
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001148 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001149 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001150
Jonathan Peyton30419822017-05-12 18:01:32 +00001151 if ((__kmp_user_lock_kind == lk_tas) &&
1152 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1153 lck = (kmp_user_lock_p)crit;
1154 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001155#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001156 else if ((__kmp_user_lock_kind == lk_futex) &&
1157 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1158 lck = (kmp_user_lock_p)crit;
1159 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 else { // ticket, queuing or drdpa
1162 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1163 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164
Jonathan Peyton30419822017-05-12 18:01:32 +00001165 if (__kmp_env_consistency_check)
1166 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001167
Jonathan Peyton30419822017-05-12 18:01:32 +00001168// since the critical directive binds to all threads, not just the current
1169// team we have to check this even if we are in a serialized team.
1170// also, even if we are the uber thread, we still have to conduct the lock,
1171// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172
1173#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001174 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001176#if OMPT_SUPPORT && OMPT_OPTIONAL
1177 OMPT_STORE_RETURN_ADDRESS(gtid);
1178 void *codeptr_ra = NULL;
1179 if (ompt_enabled.enabled) {
1180 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1181 /* OMPT state update */
1182 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001183 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001184 ti.state = omp_state_wait_critical;
1185
1186 /* OMPT event callback */
1187 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1188 if (ompt_enabled.ompt_callback_mutex_acquire) {
1189 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1190 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00001191 (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001192 }
1193 }
1194#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 // Value of 'crit' should be good for using as a critical_id of the critical
1196 // section directive.
1197 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001198
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001199#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001201#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001202#if OMPT_SUPPORT && OMPT_OPTIONAL
1203 if (ompt_enabled.enabled) {
1204 /* OMPT state update */
1205 ti.state = prev_state;
1206 ti.wait_id = 0;
1207
1208 /* OMPT event callback */
1209 if (ompt_enabled.ompt_callback_mutex_acquired) {
1210 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001211 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
Joachim Protze82e94a52017-11-01 10:08:30 +00001212 }
1213 }
1214#endif
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001215 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001216
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001217 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001219#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001220}
1221
1222#if KMP_USE_DYNAMIC_LOCK
1223
1224// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001225static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001226#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001227#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001228#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001229#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001230#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001231
1232#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001233#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001234#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001235#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001236#endif
1237
Jonathan Peyton30419822017-05-12 18:01:32 +00001238 // Hints that do not require further logic
1239 if (hint & kmp_lock_hint_hle)
1240 return KMP_TSX_LOCK(hle);
1241 if (hint & kmp_lock_hint_rtm)
1242 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1243 if (hint & kmp_lock_hint_adaptive)
1244 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001245
Jonathan Peyton30419822017-05-12 18:01:32 +00001246 // Rule out conflicting hints first by returning the default lock
1247 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001248 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001249 if ((hint & omp_lock_hint_speculative) &&
1250 (hint & omp_lock_hint_nonspeculative))
1251 return __kmp_user_lock_seq;
1252
1253 // Do not even consider speculation when it appears to be contended
1254 if (hint & omp_lock_hint_contended)
1255 return lockseq_queuing;
1256
1257 // Uncontended lock without speculation
1258 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1259 return lockseq_tas;
1260
1261 // HLE lock for speculation
1262 if (hint & omp_lock_hint_speculative)
1263 return KMP_TSX_LOCK(hle);
1264
1265 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001266}
1267
Joachim Protze82e94a52017-11-01 10:08:30 +00001268#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001269#if KMP_USE_DYNAMIC_LOCK
Joachim Protze1b2bd262018-01-17 10:06:01 +00001270static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001271__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1272 if (user_lock) {
1273 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1274 case 0:
1275 break;
1276#if KMP_USE_FUTEX
1277 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001278 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001279#endif
1280 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001281 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001282#if KMP_USE_TSX
1283 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001284 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001285#endif
1286 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001287 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001288 }
1289 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1290 }
1291 KMP_ASSERT(ilock);
1292 switch (ilock->type) {
1293#if KMP_USE_TSX
1294 case locktag_adaptive:
1295 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001296 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001297#endif
1298 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001299 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001300#if KMP_USE_FUTEX
1301 case locktag_nested_futex:
1302#endif
1303 case locktag_ticket:
1304 case locktag_queuing:
1305 case locktag_drdpa:
1306 case locktag_nested_ticket:
1307 case locktag_nested_queuing:
1308 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001309 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001310 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001311 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001312 }
1313}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001314#else
Joachim Protze82e94a52017-11-01 10:08:30 +00001315// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001316static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001317 switch (__kmp_user_lock_kind) {
1318 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001319 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001320#if KMP_USE_FUTEX
1321 case lk_futex:
1322#endif
1323 case lk_ticket:
1324 case lk_queuing:
1325 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001326 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001327#if KMP_USE_TSX
1328 case lk_hle:
1329 case lk_rtm:
1330 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001331 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001332#endif
1333 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001334 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001335 }
1336}
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001337#endif // KMP_USE_DYNAMIC_LOCK
1338#endif // OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze82e94a52017-11-01 10:08:30 +00001339
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001340/*!
1341@ingroup WORK_SHARING
1342@param loc source location information.
1343@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001344@param crit identity of the critical section. This could be a pointer to a lock
1345associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001346@param hint the lock hint.
1347
Jonathan Peyton30419822017-05-12 18:01:32 +00001348Enter code protected by a `critical` construct with a hint. The hint value is
1349used to suggest a lock implementation. This function blocks until the executing
1350thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001351speculative execution and the hardware supports it.
1352*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001353void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
Jonathan Peytona2f6eff2018-09-07 18:46:40 +00001354 kmp_critical_name *crit, uint32_t hint) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 KMP_COUNT_BLOCK(OMP_CRITICAL);
1356 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001357#if OMPT_SUPPORT && OMPT_OPTIONAL
1358 omp_state_t prev_state = omp_state_undefined;
1359 ompt_thread_info_t ti;
1360 // This is the case, if called from __kmpc_critical:
1361 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1362 if (!codeptr)
1363 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1364#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001365
Jonathan Peyton30419822017-05-12 18:01:32 +00001366 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001367
Jonathan Peyton30419822017-05-12 18:01:32 +00001368 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1369 // Check if it is initialized.
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001370 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
Jonathan Peyton30419822017-05-12 18:01:32 +00001371 if (*lk == 0) {
1372 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1373 if (KMP_IS_D_LOCK(lckseq)) {
1374 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1375 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001376 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001377 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001378 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001379 }
1380 // Branch for accessing the actual lock object and set operation. This
1381 // branching is inevitable since this lock initialization does not follow the
1382 // normal dispatch path (lock table is not used).
1383 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1384 lck = (kmp_user_lock_p)lk;
1385 if (__kmp_env_consistency_check) {
1386 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1387 __kmp_map_hint_to_lock(hint));
1388 }
1389#if USE_ITT_BUILD
1390 __kmp_itt_critical_acquiring(lck);
1391#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001392#if OMPT_SUPPORT && OMPT_OPTIONAL
1393 if (ompt_enabled.enabled) {
1394 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1395 /* OMPT state update */
1396 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001397 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001398 ti.state = omp_state_wait_critical;
1399
1400 /* OMPT event callback */
1401 if (ompt_enabled.ompt_callback_mutex_acquire) {
1402 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1403 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001404 __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001405 }
1406 }
1407#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001408#if KMP_USE_INLINED_TAS
1409 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1410 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1411 } else
1412#elif KMP_USE_INLINED_FUTEX
1413 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1414 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1415 } else
1416#endif
1417 {
1418 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1419 }
1420 } else {
1421 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1422 lck = ilk->lock;
1423 if (__kmp_env_consistency_check) {
1424 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1425 __kmp_map_hint_to_lock(hint));
1426 }
1427#if USE_ITT_BUILD
1428 __kmp_itt_critical_acquiring(lck);
1429#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001430#if OMPT_SUPPORT && OMPT_OPTIONAL
1431 if (ompt_enabled.enabled) {
1432 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1433 /* OMPT state update */
1434 prev_state = ti.state;
Joachim Protze40636132018-05-28 08:16:08 +00001435 ti.wait_id = (omp_wait_id_t)lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001436 ti.state = omp_state_wait_critical;
1437
1438 /* OMPT event callback */
1439 if (ompt_enabled.ompt_callback_mutex_acquire) {
1440 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1441 ompt_mutex_critical, (unsigned int)hint,
Joachim Protze40636132018-05-28 08:16:08 +00001442 __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001443 }
1444 }
1445#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001446 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1447 }
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001448 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001449
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001451 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001452#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001453#if OMPT_SUPPORT && OMPT_OPTIONAL
1454 if (ompt_enabled.enabled) {
1455 /* OMPT state update */
1456 ti.state = prev_state;
1457 ti.wait_id = 0;
1458
1459 /* OMPT event callback */
1460 if (ompt_enabled.ompt_callback_mutex_acquired) {
1461 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00001462 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001463 }
1464 }
1465#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466
Jonathan Peyton30419822017-05-12 18:01:32 +00001467 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1468 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001469} // __kmpc_critical_with_hint
1470
1471#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001472
1473/*!
1474@ingroup WORK_SHARING
1475@param loc source location information.
1476@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001477@param crit identity of the critical section. This could be a pointer to a lock
1478associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001479
1480Leave a critical section, releasing any lock that was held during its execution.
1481*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001482void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1483 kmp_critical_name *crit) {
1484 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485
Jonathan Peyton30419822017-05-12 18:01:32 +00001486 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001487
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001488#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001489 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1490 lck = (kmp_user_lock_p)crit;
1491 KMP_ASSERT(lck != NULL);
1492 if (__kmp_env_consistency_check) {
1493 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001494 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001495#if USE_ITT_BUILD
1496 __kmp_itt_critical_releasing(lck);
1497#endif
1498#if KMP_USE_INLINED_TAS
1499 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1500 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1501 } else
1502#elif KMP_USE_INLINED_FUTEX
1503 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1504 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1505 } else
1506#endif
1507 {
1508 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1509 }
1510 } else {
1511 kmp_indirect_lock_t *ilk =
1512 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1513 KMP_ASSERT(ilk != NULL);
1514 lck = ilk->lock;
1515 if (__kmp_env_consistency_check) {
1516 __kmp_pop_sync(global_tid, ct_critical, loc);
1517 }
1518#if USE_ITT_BUILD
1519 __kmp_itt_critical_releasing(lck);
1520#endif
1521 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1522 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001523
1524#else // KMP_USE_DYNAMIC_LOCK
1525
Jonathan Peyton30419822017-05-12 18:01:32 +00001526 if ((__kmp_user_lock_kind == lk_tas) &&
1527 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1528 lck = (kmp_user_lock_p)crit;
1529 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001530#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001531 else if ((__kmp_user_lock_kind == lk_futex) &&
1532 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1533 lck = (kmp_user_lock_p)crit;
1534 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001536 else { // ticket, queuing or drdpa
1537 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1538 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001539
Jonathan Peyton30419822017-05-12 18:01:32 +00001540 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001541
Jonathan Peyton30419822017-05-12 18:01:32 +00001542 if (__kmp_env_consistency_check)
1543 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544
1545#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001546 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001548 // Value of 'crit' should be good for using as a critical_id of the critical
1549 // section directive.
1550 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551
Joachim Protze82e94a52017-11-01 10:08:30 +00001552#endif // KMP_USE_DYNAMIC_LOCK
1553
1554#if OMPT_SUPPORT && OMPT_OPTIONAL
1555 /* OMPT release event triggers after lock is released; place here to trigger
1556 * for all #if branches */
1557 OMPT_STORE_RETURN_ADDRESS(global_tid);
1558 if (ompt_enabled.ompt_callback_mutex_released) {
1559 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00001560 ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001561 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001562#endif
1563
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 KMP_POP_PARTITIONED_TIMER();
1565 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566}
1567
1568/*!
1569@ingroup SYNCHRONIZATION
1570@param loc source location information
1571@param global_tid thread id.
1572@return one if the thread should execute the master block, zero otherwise
1573
Jonathan Peyton30419822017-05-12 18:01:32 +00001574Start execution of a combined barrier and master. The barrier is executed inside
1575this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001577kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1578 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579
Jonathan Peyton30419822017-05-12 18:01:32 +00001580 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001581
Jonathan Peyton30419822017-05-12 18:01:32 +00001582 if (!TCR_4(__kmp_init_parallel))
1583 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584
Jonathan Peyton30419822017-05-12 18:01:32 +00001585 if (__kmp_env_consistency_check)
1586 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587
Joachim Protze82e94a52017-11-01 10:08:30 +00001588#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001589 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001590 if (ompt_enabled.enabled) {
1591 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001592 if (ompt_frame->enter_frame == NULL)
1593 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001594 OMPT_STORE_RETURN_ADDRESS(global_tid);
1595 }
1596#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001597#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001598 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001599#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001601#if OMPT_SUPPORT && OMPT_OPTIONAL
1602 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001603 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001604 }
1605#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001606
Jonathan Peyton30419822017-05-12 18:01:32 +00001607 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608}
1609
1610/*!
1611@ingroup SYNCHRONIZATION
1612@param loc source location information
1613@param global_tid thread id.
1614
1615Complete the execution of a combined barrier and master. This function should
1616only be called at the completion of the <tt>master</tt> code. Other threads will
1617still be waiting at the barrier and this call releases them.
1618*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001619void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1620 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001621
Jonathan Peyton30419822017-05-12 18:01:32 +00001622 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001623}
1624
1625/*!
1626@ingroup SYNCHRONIZATION
1627@param loc source location information
1628@param global_tid thread id.
1629@return one if the thread should execute the master block, zero otherwise
1630
1631Start execution of a combined barrier and master(nowait) construct.
1632The barrier is executed inside this function.
1633There is no equivalent "end" function, since the
1634*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001635kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1636 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001637
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 if (!TCR_4(__kmp_init_parallel))
1641 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642
Jonathan Peyton30419822017-05-12 18:01:32 +00001643 if (__kmp_env_consistency_check) {
1644 if (loc == 0) {
1645 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001647 __kmp_check_barrier(global_tid, ct_barrier, loc);
1648 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001649
Joachim Protze82e94a52017-11-01 10:08:30 +00001650#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00001651 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001652 if (ompt_enabled.enabled) {
1653 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001654 if (ompt_frame->enter_frame == NULL)
1655 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001656 OMPT_STORE_RETURN_ADDRESS(global_tid);
1657 }
1658#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001659#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001660 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001661#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001662 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001663#if OMPT_SUPPORT && OMPT_OPTIONAL
1664 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001665 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001666 }
1667#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001668
Jonathan Peyton30419822017-05-12 18:01:32 +00001669 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 if (__kmp_env_consistency_check) {
1672 /* there's no __kmpc_end_master called; so the (stats) */
1673 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 if (global_tid < 0) {
1676 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001678 if (ret) {
1679 /* only one thread should do the pop since only */
1680 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001681
Jonathan Peyton30419822017-05-12 18:01:32 +00001682 __kmp_pop_sync(global_tid, ct_master, loc);
1683 }
1684 }
1685
1686 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687}
1688
1689/* The BARRIER for a SINGLE process section is always explicit */
1690/*!
1691@ingroup WORK_SHARING
1692@param loc source location information
1693@param global_tid global thread number
1694@return One if this thread should execute the single construct, zero otherwise.
1695
1696Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001697There are no implicit barriers in the two "single" calls, rather the compiler
1698should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699*/
1700
Jonathan Peyton30419822017-05-12 18:01:32 +00001701kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1702 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001703
Jonathan Peyton30419822017-05-12 18:01:32 +00001704 if (rc) {
1705 // We are going to execute the single statement, so we should count it.
1706 KMP_COUNT_BLOCK(OMP_SINGLE);
1707 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1708 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001709
Joachim Protze82e94a52017-11-01 10:08:30 +00001710#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001711 kmp_info_t *this_thr = __kmp_threads[global_tid];
1712 kmp_team_t *team = this_thr->th.th_team;
1713 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001714
Joachim Protze82e94a52017-11-01 10:08:30 +00001715 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001717 if (ompt_enabled.ompt_callback_work) {
1718 ompt_callbacks.ompt_callback(ompt_callback_work)(
1719 ompt_work_single_executor, ompt_scope_begin,
1720 &(team->t.ompt_team_info.parallel_data),
1721 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1722 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001723 }
1724 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001725 if (ompt_enabled.ompt_callback_work) {
1726 ompt_callbacks.ompt_callback(ompt_callback_work)(
1727 ompt_work_single_other, ompt_scope_begin,
1728 &(team->t.ompt_team_info.parallel_data),
1729 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730 1, OMPT_GET_RETURN_ADDRESS(0));
1731 ompt_callbacks.ompt_callback(ompt_callback_work)(
1732 ompt_work_single_other, ompt_scope_end,
1733 &(team->t.ompt_team_info.parallel_data),
1734 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1735 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001736 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001737 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001738 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001739#endif
1740
Jonathan Peyton30419822017-05-12 18:01:32 +00001741 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742}
1743
1744/*!
1745@ingroup WORK_SHARING
1746@param loc source location information
1747@param global_tid global thread number
1748
1749Mark the end of a <tt>single</tt> construct. This function should
1750only be called by the thread that executed the block of code protected
1751by the `single` construct.
1752*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001753void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1754 __kmp_exit_single(global_tid);
1755 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001756
Joachim Protze82e94a52017-11-01 10:08:30 +00001757#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001758 kmp_info_t *this_thr = __kmp_threads[global_tid];
1759 kmp_team_t *team = this_thr->th.th_team;
1760 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761
Joachim Protze82e94a52017-11-01 10:08:30 +00001762 if (ompt_enabled.ompt_callback_work) {
1763 ompt_callbacks.ompt_callback(ompt_callback_work)(
1764 ompt_work_single_executor, ompt_scope_end,
1765 &(team->t.ompt_team_info.parallel_data),
1766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1767 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001768 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001769#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001770}
1771
1772/*!
1773@ingroup WORK_SHARING
1774@param loc Source location
1775@param global_tid Global thread id
1776
1777Mark the end of a statically scheduled loop.
1778*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001779void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001780 KMP_POP_PARTITIONED_TIMER();
Jonathan Peyton30419822017-05-12 18:01:32 +00001781 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001782
Joachim Protze82e94a52017-11-01 10:08:30 +00001783#if OMPT_SUPPORT && OMPT_OPTIONAL
1784 if (ompt_enabled.ompt_callback_work) {
Joachim Protze489cdb72018-09-10 14:34:54 +00001785 ompt_work_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001786 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001787 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1788 // Determine workshare type
1789 if (loc != NULL) {
1790 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1791 ompt_work_type = ompt_work_loop;
1792 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1793 ompt_work_type = ompt_work_sections;
1794 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1795 ompt_work_type = ompt_work_distribute;
1796 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001797 // use default set above.
1798 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001799 }
1800 KMP_DEBUG_ASSERT(ompt_work_type);
1801 }
1802 ompt_callbacks.ompt_callback(ompt_callback_work)(
1803 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1804 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001805 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001807 if (__kmp_env_consistency_check)
1808 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809}
1810
Jonathan Peyton30419822017-05-12 18:01:32 +00001811// User routines which take C-style arguments (call by value)
1812// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813
Jonathan Peyton30419822017-05-12 18:01:32 +00001814void ompc_set_num_threads(int arg) {
1815 // !!!!! TODO: check the per-task binding
1816 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817}
1818
Jonathan Peyton30419822017-05-12 18:01:32 +00001819void ompc_set_dynamic(int flag) {
1820 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jonathan Peyton30419822017-05-12 18:01:32 +00001822 /* For the thread-private implementation of the internal controls */
1823 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824
Jonathan Peyton30419822017-05-12 18:01:32 +00001825 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001826
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828}
1829
Jonathan Peyton30419822017-05-12 18:01:32 +00001830void ompc_set_nested(int flag) {
1831 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832
Jonathan Peyton30419822017-05-12 18:01:32 +00001833 /* For the thread-private internal controls implementation */
1834 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835
Jonathan Peyton30419822017-05-12 18:01:32 +00001836 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837
Jonathan Peyton30419822017-05-12 18:01:32 +00001838 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001839}
1840
Jonathan Peyton30419822017-05-12 18:01:32 +00001841void ompc_set_max_active_levels(int max_active_levels) {
1842 /* TO DO */
1843 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845 /* For the per-thread internal controls implementation */
1846 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847}
1848
Jonathan Peyton30419822017-05-12 18:01:32 +00001849void ompc_set_schedule(omp_sched_t kind, int modifier) {
1850 // !!!!! TODO: check the per-task binding
1851 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852}
1853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854int ompc_get_ancestor_thread_num(int level) {
1855 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856}
1857
Jonathan Peyton30419822017-05-12 18:01:32 +00001858int ompc_get_team_size(int level) {
1859 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001860}
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862void kmpc_set_stacksize(int arg) {
1863 // __kmp_aux_set_stacksize initializes the library if needed
1864 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865}
1866
Jonathan Peyton30419822017-05-12 18:01:32 +00001867void kmpc_set_stacksize_s(size_t arg) {
1868 // __kmp_aux_set_stacksize initializes the library if needed
1869 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001870}
1871
Jonathan Peyton30419822017-05-12 18:01:32 +00001872void kmpc_set_blocktime(int arg) {
1873 int gtid, tid;
1874 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 gtid = __kmp_entry_gtid();
1877 tid = __kmp_tid_from_gtid(gtid);
1878 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881}
1882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883void kmpc_set_library(int arg) {
1884 // __kmp_user_set_library initializes the library if needed
1885 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001886}
1887
Jonathan Peyton30419822017-05-12 18:01:32 +00001888void kmpc_set_defaults(char const *str) {
1889 // __kmp_aux_set_defaults initializes the library if needed
1890 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891}
1892
Jonathan Peyton30419822017-05-12 18:01:32 +00001893void kmpc_set_disp_num_buffers(int arg) {
1894 // ignore after initialization because some teams have already
1895 // allocated dispatch buffers
1896 if (__kmp_init_serial == 0 && arg > 0)
1897 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001898}
1899
Jonathan Peyton30419822017-05-12 18:01:32 +00001900int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001901#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001902 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001904 if (!TCR_4(__kmp_init_middle)) {
1905 __kmp_middle_initialize();
1906 }
1907 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001908#endif
1909}
1910
Jonathan Peyton30419822017-05-12 18:01:32 +00001911int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001912#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001913 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 if (!TCR_4(__kmp_init_middle)) {
1916 __kmp_middle_initialize();
1917 }
1918 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919#endif
1920}
1921
Jonathan Peyton30419822017-05-12 18:01:32 +00001922int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001923#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001926 if (!TCR_4(__kmp_init_middle)) {
1927 __kmp_middle_initialize();
1928 }
1929 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930#endif
1931}
1932
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933/* -------------------------------------------------------------------------- */
1934/*!
1935@ingroup THREADPRIVATE
1936@param loc source location information
1937@param gtid global thread number
1938@param cpy_size size of the cpy_data buffer
1939@param cpy_data pointer to data to be copied
1940@param cpy_func helper function to call for copying data
1941@param didit flag variable: 1=single thread; 0=not single thread
1942
Jonathan Peyton30419822017-05-12 18:01:32 +00001943__kmpc_copyprivate implements the interface for the private data broadcast
1944needed for the copyprivate clause associated with a single region in an
1945OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00001947One of the threads (called the single thread) should have the <tt>didit</tt>
1948variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949All threads pass a pointer to a data buffer (cpy_data) that they have built.
1950
Jonathan Peyton30419822017-05-12 18:01:32 +00001951The OpenMP specification forbids the use of nowait on the single region when a
1952copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1953barrier internally to avoid race conditions, so the code generation for the
1954single region should avoid generating a barrier after the call to @ref
1955__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001956
1957The <tt>gtid</tt> parameter is the global thread id for the current thread.
1958The <tt>loc</tt> parameter is a pointer to source location information.
1959
Jonathan Peyton30419822017-05-12 18:01:32 +00001960Internal implementation: The single thread will first copy its descriptor
1961address (cpy_data) to a team-private location, then the other threads will each
1962call the function pointed to by the parameter cpy_func, which carries out the
1963copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964
Jonathan Peyton30419822017-05-12 18:01:32 +00001965The cpy_func routine used for the copy and the contents of the data area defined
1966by cpy_data and cpy_size may be built in any fashion that will allow the copy
1967to be done. For instance, the cpy_data buffer can hold the actual data to be
1968copied or it may hold a list of pointers to the data. The cpy_func routine must
1969interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970
1971The interface to cpy_func is as follows:
1972@code
1973void cpy_func( void *destination, void *source )
1974@endcode
1975where void *destination is the cpy_data pointer for the thread being copied to
1976and void *source is the cpy_data pointer for the thread being copied from.
1977*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001978void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1979 void *cpy_data, void (*cpy_func)(void *, void *),
1980 kmp_int32 didit) {
1981 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001986
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 if (__kmp_env_consistency_check) {
1990 if (loc == 0) {
1991 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994
Jonathan Peyton30419822017-05-12 18:01:32 +00001995 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996
Jonathan Peyton30419822017-05-12 18:01:32 +00001997 if (didit)
1998 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999
Joachim Protze82e94a52017-11-01 10:08:30 +00002000#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00002001 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00002002 if (ompt_enabled.enabled) {
2003 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00002004 if (ompt_frame->enter_frame == NULL)
2005 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00002006 OMPT_STORE_RETURN_ADDRESS(gtid);
2007 }
2008#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002009/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002010#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002012#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002013 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014
Jonathan Peyton30419822017-05-12 18:01:32 +00002015 if (!didit)
2016 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017
Jonathan Peyton30419822017-05-12 18:01:32 +00002018// Consider next barrier a user-visible barrier for barrier region boundaries
2019// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020
Joachim Protze82e94a52017-11-01 10:08:30 +00002021#if OMPT_SUPPORT
2022 if (ompt_enabled.enabled) {
2023 OMPT_STORE_RETURN_ADDRESS(gtid);
2024 }
2025#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002026#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002027 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2028// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002029#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002030 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002031#if OMPT_SUPPORT && OMPT_OPTIONAL
2032 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00002033 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002034 }
2035#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036}
2037
2038/* -------------------------------------------------------------------------- */
2039
Jonathan Peyton30419822017-05-12 18:01:32 +00002040#define INIT_LOCK __kmp_init_user_lock_with_checks
2041#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2042#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2043#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2044#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2045#define ACQUIRE_NESTED_LOCK_TIMED \
2046 __kmp_acquire_nested_user_lock_with_checks_timed
2047#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2048#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2049#define TEST_LOCK __kmp_test_user_lock_with_checks
2050#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2051#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2052#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053
Jonathan Peyton30419822017-05-12 18:01:32 +00002054// TODO: Make check abort messages use location info & pass it into
2055// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002057#if KMP_USE_DYNAMIC_LOCK
2058
2059// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002060static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2061 kmp_dyna_lockseq_t seq) {
2062 if (KMP_IS_D_LOCK(seq)) {
2063 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002064#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002065 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002066#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002067 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002068 KMP_INIT_I_LOCK(lock, seq);
2069#if USE_ITT_BUILD
2070 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2071 __kmp_itt_lock_creating(ilk->lock, loc);
2072#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002073 }
2074}
2075
2076// internal nest lock initializer
2077static __forceinline void
2078__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2079 kmp_dyna_lockseq_t seq) {
2080#if KMP_USE_TSX
2081 // Don't have nested lock implementation for speculative locks
2082 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2083 seq = __kmp_user_lock_seq;
2084#endif
2085 switch (seq) {
2086 case lockseq_tas:
2087 seq = lockseq_nested_tas;
2088 break;
2089#if KMP_USE_FUTEX
2090 case lockseq_futex:
2091 seq = lockseq_nested_futex;
2092 break;
2093#endif
2094 case lockseq_ticket:
2095 seq = lockseq_nested_ticket;
2096 break;
2097 case lockseq_queuing:
2098 seq = lockseq_nested_queuing;
2099 break;
2100 case lockseq_drdpa:
2101 seq = lockseq_nested_drdpa;
2102 break;
2103 default:
2104 seq = lockseq_nested_queuing;
2105 }
2106 KMP_INIT_I_LOCK(lock, seq);
2107#if USE_ITT_BUILD
2108 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2109 __kmp_itt_lock_creating(ilk->lock, loc);
2110#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002111}
2112
2113/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002114void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2115 uintptr_t hint) {
2116 KMP_DEBUG_ASSERT(__kmp_init_serial);
2117 if (__kmp_env_consistency_check && user_lock == NULL) {
2118 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2119 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002120
Jonathan Peyton30419822017-05-12 18:01:32 +00002121 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002122
2123#if OMPT_SUPPORT && OMPT_OPTIONAL
2124 // This is the case, if called from omp_init_lock_with_hint:
2125 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2126 if (!codeptr)
2127 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2128 if (ompt_enabled.ompt_callback_lock_init) {
2129 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2130 ompt_mutex_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002131 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002132 codeptr);
2133 }
2134#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002135}
2136
2137/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002138void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2139 void **user_lock, uintptr_t hint) {
2140 KMP_DEBUG_ASSERT(__kmp_init_serial);
2141 if (__kmp_env_consistency_check && user_lock == NULL) {
2142 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2143 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002144
Jonathan Peyton30419822017-05-12 18:01:32 +00002145 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002146
2147#if OMPT_SUPPORT && OMPT_OPTIONAL
2148 // This is the case, if called from omp_init_lock_with_hint:
2149 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2150 if (!codeptr)
2151 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2152 if (ompt_enabled.ompt_callback_lock_init) {
2153 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2154 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
Joachim Protze40636132018-05-28 08:16:08 +00002155 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002156 codeptr);
2157 }
2158#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002159}
2160
2161#endif // KMP_USE_DYNAMIC_LOCK
2162
Jim Cownie5e8470a2013-09-27 10:38:44 +00002163/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002164void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002165#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002166
2167 KMP_DEBUG_ASSERT(__kmp_init_serial);
2168 if (__kmp_env_consistency_check && user_lock == NULL) {
2169 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2170 }
2171 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002172
Joachim Protze82e94a52017-11-01 10:08:30 +00002173#if OMPT_SUPPORT && OMPT_OPTIONAL
2174 // This is the case, if called from omp_init_lock_with_hint:
2175 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2176 if (!codeptr)
2177 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2178 if (ompt_enabled.ompt_callback_lock_init) {
2179 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2180 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002181 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002182 codeptr);
2183 }
2184#endif
2185
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002186#else // KMP_USE_DYNAMIC_LOCK
2187
Jonathan Peyton30419822017-05-12 18:01:32 +00002188 static char const *const func = "omp_init_lock";
2189 kmp_user_lock_p lck;
2190 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002191
Jonathan Peyton30419822017-05-12 18:01:32 +00002192 if (__kmp_env_consistency_check) {
2193 if (user_lock == NULL) {
2194 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002197
Jonathan Peyton30419822017-05-12 18:01:32 +00002198 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002199
Jonathan Peyton30419822017-05-12 18:01:32 +00002200 if ((__kmp_user_lock_kind == lk_tas) &&
2201 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2202 lck = (kmp_user_lock_p)user_lock;
2203 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002204#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002205 else if ((__kmp_user_lock_kind == lk_futex) &&
2206 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2207 lck = (kmp_user_lock_p)user_lock;
2208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002210 else {
2211 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2212 }
2213 INIT_LOCK(lck);
2214 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215
Joachim Protze82e94a52017-11-01 10:08:30 +00002216#if OMPT_SUPPORT && OMPT_OPTIONAL
2217 // This is the case, if called from omp_init_lock_with_hint:
2218 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2219 if (!codeptr)
2220 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2221 if (ompt_enabled.ompt_callback_lock_init) {
2222 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2223 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002224 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002225 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002226#endif
2227
Jim Cownie5e8470a2013-09-27 10:38:44 +00002228#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002229 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002230#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002231
2232#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233} // __kmpc_init_lock
2234
2235/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002236void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002237#if KMP_USE_DYNAMIC_LOCK
2238
Jonathan Peyton30419822017-05-12 18:01:32 +00002239 KMP_DEBUG_ASSERT(__kmp_init_serial);
2240 if (__kmp_env_consistency_check && user_lock == NULL) {
2241 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2242 }
2243 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002244
Joachim Protze82e94a52017-11-01 10:08:30 +00002245#if OMPT_SUPPORT && OMPT_OPTIONAL
2246 // This is the case, if called from omp_init_lock_with_hint:
2247 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2248 if (!codeptr)
2249 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2250 if (ompt_enabled.ompt_callback_lock_init) {
2251 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2252 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002253 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002254 codeptr);
2255 }
2256#endif
2257
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002258#else // KMP_USE_DYNAMIC_LOCK
2259
Jonathan Peyton30419822017-05-12 18:01:32 +00002260 static char const *const func = "omp_init_nest_lock";
2261 kmp_user_lock_p lck;
2262 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 if (__kmp_env_consistency_check) {
2265 if (user_lock == NULL) {
2266 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002268 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002269
Jonathan Peyton30419822017-05-12 18:01:32 +00002270 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002271
Jonathan Peyton30419822017-05-12 18:01:32 +00002272 if ((__kmp_user_lock_kind == lk_tas) &&
2273 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2274 OMP_NEST_LOCK_T_SIZE)) {
2275 lck = (kmp_user_lock_p)user_lock;
2276 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002277#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002278 else if ((__kmp_user_lock_kind == lk_futex) &&
2279 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2280 OMP_NEST_LOCK_T_SIZE)) {
2281 lck = (kmp_user_lock_p)user_lock;
2282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002284 else {
2285 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2286 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287
Jonathan Peyton30419822017-05-12 18:01:32 +00002288 INIT_NESTED_LOCK(lck);
2289 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002290
Joachim Protze82e94a52017-11-01 10:08:30 +00002291#if OMPT_SUPPORT && OMPT_OPTIONAL
2292 // This is the case, if called from omp_init_lock_with_hint:
2293 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2294 if (!codeptr)
2295 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2296 if (ompt_enabled.ompt_callback_lock_init) {
2297 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2298 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002299 (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002300 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002301#endif
2302
Jim Cownie5e8470a2013-09-27 10:38:44 +00002303#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002304 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002306
2307#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308} // __kmpc_init_nest_lock
2309
Jonathan Peyton30419822017-05-12 18:01:32 +00002310void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002311#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312
Jonathan Peyton30419822017-05-12 18:01:32 +00002313#if USE_ITT_BUILD
2314 kmp_user_lock_p lck;
2315 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2316 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2317 } else {
2318 lck = (kmp_user_lock_p)user_lock;
2319 }
2320 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002321#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002322#if OMPT_SUPPORT && OMPT_OPTIONAL
2323 // This is the case, if called from omp_init_lock_with_hint:
2324 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2325 if (!codeptr)
2326 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2327 if (ompt_enabled.ompt_callback_lock_destroy) {
2328 kmp_user_lock_p lck;
2329 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2330 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2331 } else {
2332 lck = (kmp_user_lock_p)user_lock;
2333 }
2334 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002335 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002336 }
2337#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002338 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2339#else
2340 kmp_user_lock_p lck;
2341
2342 if ((__kmp_user_lock_kind == lk_tas) &&
2343 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2344 lck = (kmp_user_lock_p)user_lock;
2345 }
2346#if KMP_USE_FUTEX
2347 else if ((__kmp_user_lock_kind == lk_futex) &&
2348 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2349 lck = (kmp_user_lock_p)user_lock;
2350 }
2351#endif
2352 else {
2353 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2354 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355
Joachim Protze82e94a52017-11-01 10:08:30 +00002356#if OMPT_SUPPORT && OMPT_OPTIONAL
2357 // This is the case, if called from omp_init_lock_with_hint:
2358 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2359 if (!codeptr)
2360 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2361 if (ompt_enabled.ompt_callback_lock_destroy) {
2362 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002363 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002364 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002365#endif
2366
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002370 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371
Jonathan Peyton30419822017-05-12 18:01:32 +00002372 if ((__kmp_user_lock_kind == lk_tas) &&
2373 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2374 ;
2375 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002376#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002377 else if ((__kmp_user_lock_kind == lk_futex) &&
2378 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2379 ;
2380 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002381#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002382 else {
2383 __kmp_user_lock_free(user_lock, gtid, lck);
2384 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002385#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386} // __kmpc_destroy_lock
2387
2388/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002389void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002390#if KMP_USE_DYNAMIC_LOCK
2391
Jonathan Peyton30419822017-05-12 18:01:32 +00002392#if USE_ITT_BUILD
2393 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2394 __kmp_itt_lock_destroyed(ilk->lock);
2395#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002396#if OMPT_SUPPORT && OMPT_OPTIONAL
2397 // This is the case, if called from omp_init_lock_with_hint:
2398 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2399 if (!codeptr)
2400 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2401 if (ompt_enabled.ompt_callback_lock_destroy) {
2402 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002403 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002404 }
2405#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002406 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002407
2408#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409
Jonathan Peyton30419822017-05-12 18:01:32 +00002410 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002411
Jonathan Peyton30419822017-05-12 18:01:32 +00002412 if ((__kmp_user_lock_kind == lk_tas) &&
2413 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2414 OMP_NEST_LOCK_T_SIZE)) {
2415 lck = (kmp_user_lock_p)user_lock;
2416 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002417#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002418 else if ((__kmp_user_lock_kind == lk_futex) &&
2419 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2420 OMP_NEST_LOCK_T_SIZE)) {
2421 lck = (kmp_user_lock_p)user_lock;
2422 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002424 else {
2425 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2426 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427
Joachim Protze82e94a52017-11-01 10:08:30 +00002428#if OMPT_SUPPORT && OMPT_OPTIONAL
2429 // This is the case, if called from omp_init_lock_with_hint:
2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2431 if (!codeptr)
2432 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433 if (ompt_enabled.ompt_callback_lock_destroy) {
2434 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
Joachim Protze40636132018-05-28 08:16:08 +00002435 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002436 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002437#endif
2438
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002440 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002441#endif /* USE_ITT_BUILD */
2442
Jonathan Peyton30419822017-05-12 18:01:32 +00002443 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444
Jonathan Peyton30419822017-05-12 18:01:32 +00002445 if ((__kmp_user_lock_kind == lk_tas) &&
2446 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2447 OMP_NEST_LOCK_T_SIZE)) {
2448 ;
2449 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002450#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002451 else if ((__kmp_user_lock_kind == lk_futex) &&
2452 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2453 OMP_NEST_LOCK_T_SIZE)) {
2454 ;
2455 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002457 else {
2458 __kmp_user_lock_free(user_lock, gtid, lck);
2459 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002460#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461} // __kmpc_destroy_nest_lock
2462
Jonathan Peyton30419822017-05-12 18:01:32 +00002463void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2464 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002465#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002466 int tag = KMP_EXTRACT_D_TAG(user_lock);
2467#if USE_ITT_BUILD
2468 __kmp_itt_lock_acquiring(
2469 (kmp_user_lock_p)
2470 user_lock); // itt function will get to the right lock object.
2471#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002472#if OMPT_SUPPORT && OMPT_OPTIONAL
2473 // This is the case, if called from omp_init_lock_with_hint:
2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475 if (!codeptr)
2476 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477 if (ompt_enabled.ompt_callback_mutex_acquire) {
2478 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2479 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002480 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002481 codeptr);
2482 }
2483#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002484#if KMP_USE_INLINED_TAS
2485 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2486 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2487 } else
2488#elif KMP_USE_INLINED_FUTEX
2489 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2490 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2491 } else
2492#endif
2493 {
2494 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2495 }
2496#if USE_ITT_BUILD
2497 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2498#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002499#if OMPT_SUPPORT && OMPT_OPTIONAL
2500 if (ompt_enabled.ompt_callback_mutex_acquired) {
2501 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002502 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002503 }
2504#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002505
2506#else // KMP_USE_DYNAMIC_LOCK
2507
Jonathan Peyton30419822017-05-12 18:01:32 +00002508 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002509
Jonathan Peyton30419822017-05-12 18:01:32 +00002510 if ((__kmp_user_lock_kind == lk_tas) &&
2511 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2512 lck = (kmp_user_lock_p)user_lock;
2513 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002514#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002515 else if ((__kmp_user_lock_kind == lk_futex) &&
2516 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2517 lck = (kmp_user_lock_p)user_lock;
2518 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002520 else {
2521 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2522 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002523
2524#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002525 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002527#if OMPT_SUPPORT && OMPT_OPTIONAL
2528 // This is the case, if called from omp_init_lock_with_hint:
2529 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2530 if (!codeptr)
2531 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2532 if (ompt_enabled.ompt_callback_mutex_acquire) {
2533 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2534 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002535 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002536 }
2537#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538
Jonathan Peyton30419822017-05-12 18:01:32 +00002539 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540
2541#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002542 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002544
Joachim Protze82e94a52017-11-01 10:08:30 +00002545#if OMPT_SUPPORT && OMPT_OPTIONAL
2546 if (ompt_enabled.ompt_callback_mutex_acquired) {
2547 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002548 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002549 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002550#endif
2551
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002552#endif // KMP_USE_DYNAMIC_LOCK
2553}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554
Jonathan Peyton30419822017-05-12 18:01:32 +00002555void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002556#if KMP_USE_DYNAMIC_LOCK
2557
Jonathan Peyton30419822017-05-12 18:01:32 +00002558#if USE_ITT_BUILD
2559 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2560#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002561#if OMPT_SUPPORT && OMPT_OPTIONAL
2562 // This is the case, if called from omp_init_lock_with_hint:
2563 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2564 if (!codeptr)
2565 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2566 if (ompt_enabled.enabled) {
2567 if (ompt_enabled.ompt_callback_mutex_acquire) {
2568 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2569 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002570 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002571 codeptr);
2572 }
2573 }
2574#endif
2575 int acquire_status =
2576 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002577 (void) acquire_status;
Jonathan Peyton30419822017-05-12 18:01:32 +00002578#if USE_ITT_BUILD
2579 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002580#endif
2581
Joachim Protze82e94a52017-11-01 10:08:30 +00002582#if OMPT_SUPPORT && OMPT_OPTIONAL
2583 if (ompt_enabled.enabled) {
2584 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2585 if (ompt_enabled.ompt_callback_mutex_acquired) {
2586 // lock_first
2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002588 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002589 }
2590 } else {
2591 if (ompt_enabled.ompt_callback_nest_lock) {
2592 // lock_next
2593 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002594 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002595 }
2596 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002597 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002598#endif
2599
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002600#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002601 int acquire_status;
2602 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603
Jonathan Peyton30419822017-05-12 18:01:32 +00002604 if ((__kmp_user_lock_kind == lk_tas) &&
2605 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2606 OMP_NEST_LOCK_T_SIZE)) {
2607 lck = (kmp_user_lock_p)user_lock;
2608 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002609#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002610 else if ((__kmp_user_lock_kind == lk_futex) &&
2611 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2612 OMP_NEST_LOCK_T_SIZE)) {
2613 lck = (kmp_user_lock_p)user_lock;
2614 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002616 else {
2617 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2618 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002619
2620#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002621 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002622#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002623#if OMPT_SUPPORT && OMPT_OPTIONAL
2624 // This is the case, if called from omp_init_lock_with_hint:
2625 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2626 if (!codeptr)
2627 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2628 if (ompt_enabled.enabled) {
2629 if (ompt_enabled.ompt_callback_mutex_acquire) {
2630 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2631 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002632 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002633 }
2634 }
2635#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636
Jonathan Peyton30419822017-05-12 18:01:32 +00002637 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002638
2639#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002640 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002642
Joachim Protze82e94a52017-11-01 10:08:30 +00002643#if OMPT_SUPPORT && OMPT_OPTIONAL
2644 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002645 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002646 if (ompt_enabled.ompt_callback_mutex_acquired) {
2647 // lock_first
2648 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002649 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002650 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002651 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002652 if (ompt_enabled.ompt_callback_nest_lock) {
2653 // lock_next
2654 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002655 ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002656 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002657 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002659#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002660
2661#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002662}
2663
Jonathan Peyton30419822017-05-12 18:01:32 +00002664void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002665#if KMP_USE_DYNAMIC_LOCK
2666
Jonathan Peyton30419822017-05-12 18:01:32 +00002667 int tag = KMP_EXTRACT_D_TAG(user_lock);
2668#if USE_ITT_BUILD
2669 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2670#endif
2671#if KMP_USE_INLINED_TAS
2672 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2673 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2674 } else
2675#elif KMP_USE_INLINED_FUTEX
2676 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2677 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2678 } else
2679#endif
2680 {
2681 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2682 }
2683
Joachim Protze82e94a52017-11-01 10:08:30 +00002684#if OMPT_SUPPORT && OMPT_OPTIONAL
2685 // This is the case, if called from omp_init_lock_with_hint:
2686 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687 if (!codeptr)
2688 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689 if (ompt_enabled.ompt_callback_mutex_released) {
2690 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002691 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002692 }
2693#endif
2694
Jonathan Peyton30419822017-05-12 18:01:32 +00002695#else // KMP_USE_DYNAMIC_LOCK
2696
2697 kmp_user_lock_p lck;
2698
2699 /* Can't use serial interval since not block structured */
2700 /* release the lock */
2701
2702 if ((__kmp_user_lock_kind == lk_tas) &&
2703 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2704#if KMP_OS_LINUX && \
2705 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2706// "fast" path implemented to fix customer performance issue
2707#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002708 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002710 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2711 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002712
2713#if OMPT_SUPPORT && OMPT_OPTIONAL
2714 // This is the case, if called from omp_init_lock_with_hint:
2715 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2716 if (!codeptr)
2717 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2718 if (ompt_enabled.ompt_callback_mutex_released) {
2719 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002720 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002721 }
2722#endif
2723
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002728 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002729#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 else if ((__kmp_user_lock_kind == lk_futex) &&
2731 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2732 lck = (kmp_user_lock_p)user_lock;
2733 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 else {
2736 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2737 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738
2739#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002740 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002741#endif /* USE_ITT_BUILD */
2742
Jonathan Peyton30419822017-05-12 18:01:32 +00002743 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002744
Joachim Protze82e94a52017-11-01 10:08:30 +00002745#if OMPT_SUPPORT && OMPT_OPTIONAL
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 if (ompt_enabled.ompt_callback_mutex_released) {
2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002752 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002753 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002754#endif
2755
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002756#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002757}
2758
2759/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002760void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002761#if KMP_USE_DYNAMIC_LOCK
2762
Jonathan Peyton30419822017-05-12 18:01:32 +00002763#if USE_ITT_BUILD
2764 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2765#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002766 int release_status =
2767 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
Gheorghe-Teodor Bercea15f54072018-08-27 19:54:26 +00002768 (void) release_status;
Joachim Protze82e94a52017-11-01 10:08:30 +00002769
2770#if OMPT_SUPPORT && OMPT_OPTIONAL
2771 // This is the case, if called from omp_init_lock_with_hint:
2772 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2773 if (!codeptr)
2774 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2775 if (ompt_enabled.enabled) {
2776 if (release_status == KMP_LOCK_RELEASED) {
2777 if (ompt_enabled.ompt_callback_mutex_released) {
2778 // release_lock_last
2779 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002780 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002781 }
2782 } else if (ompt_enabled.ompt_callback_nest_lock) {
2783 // release_lock_prev
2784 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002785 ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002786 }
2787 }
2788#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002789
2790#else // KMP_USE_DYNAMIC_LOCK
2791
2792 kmp_user_lock_p lck;
2793
2794 /* Can't use serial interval since not block structured */
2795
2796 if ((__kmp_user_lock_kind == lk_tas) &&
2797 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2798 OMP_NEST_LOCK_T_SIZE)) {
2799#if KMP_OS_LINUX && \
2800 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2801 // "fast" path implemented to fix customer performance issue
2802 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2803#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002804 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002805#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002806
2807#if OMPT_SUPPORT && OMPT_OPTIONAL
2808 int release_status = KMP_LOCK_STILL_HELD;
2809#endif
2810
Jonathan Peyton30419822017-05-12 18:01:32 +00002811 if (--(tl->lk.depth_locked) == 0) {
2812 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002813#if OMPT_SUPPORT && OMPT_OPTIONAL
2814 release_status = KMP_LOCK_RELEASED;
2815#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002816 }
2817 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002818
2819#if OMPT_SUPPORT && OMPT_OPTIONAL
2820 // This is the case, if called from omp_init_lock_with_hint:
2821 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2822 if (!codeptr)
2823 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2824 if (ompt_enabled.enabled) {
2825 if (release_status == KMP_LOCK_RELEASED) {
2826 if (ompt_enabled.ompt_callback_mutex_released) {
2827 // release_lock_last
2828 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002829 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002830 }
2831 } else if (ompt_enabled.ompt_callback_nest_lock) {
2832 // release_lock_previous
2833 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002834 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002835 }
2836 }
2837#endif
2838
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002840#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002844#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 else if ((__kmp_user_lock_kind == lk_futex) &&
2846 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2847 OMP_NEST_LOCK_T_SIZE)) {
2848 lck = (kmp_user_lock_p)user_lock;
2849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002850#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002851 else {
2852 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2853 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854
2855#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002856 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857#endif /* USE_ITT_BUILD */
2858
Jonathan Peyton30419822017-05-12 18:01:32 +00002859 int release_status;
2860 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002861#if OMPT_SUPPORT && OMPT_OPTIONAL
2862 // This is the case, if called from omp_init_lock_with_hint:
2863 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2864 if (!codeptr)
2865 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2866 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002867 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002868 if (ompt_enabled.ompt_callback_mutex_released) {
2869 // release_lock_last
2870 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
Joachim Protze40636132018-05-28 08:16:08 +00002871 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002873 } else if (ompt_enabled.ompt_callback_nest_lock) {
2874 // release_lock_previous
2875 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00002876 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002877 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002878 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002879#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002880
2881#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882}
2883
2884/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002885int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2886 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002887
2888#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002889 int rc;
2890 int tag = KMP_EXTRACT_D_TAG(user_lock);
2891#if USE_ITT_BUILD
2892 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2893#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002894#if OMPT_SUPPORT && OMPT_OPTIONAL
2895 // This is the case, if called from omp_init_lock_with_hint:
2896 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2897 if (!codeptr)
2898 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2899 if (ompt_enabled.ompt_callback_mutex_acquire) {
2900 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2901 ompt_mutex_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00002902 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00002903 codeptr);
2904 }
2905#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002906#if KMP_USE_INLINED_TAS
2907 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2908 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2909 } else
2910#elif KMP_USE_INLINED_FUTEX
2911 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2912 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2913 } else
2914#endif
2915 {
2916 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2917 }
2918 if (rc) {
2919#if USE_ITT_BUILD
2920 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2921#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002922#if OMPT_SUPPORT && OMPT_OPTIONAL
2923 if (ompt_enabled.ompt_callback_mutex_acquired) {
2924 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002925 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002926 }
2927#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002928 return FTN_TRUE;
2929 } else {
2930#if USE_ITT_BUILD
2931 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2932#endif
2933 return FTN_FALSE;
2934 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002935
2936#else // KMP_USE_DYNAMIC_LOCK
2937
Jonathan Peyton30419822017-05-12 18:01:32 +00002938 kmp_user_lock_p lck;
2939 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 if ((__kmp_user_lock_kind == lk_tas) &&
2942 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2943 lck = (kmp_user_lock_p)user_lock;
2944 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002945#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002946 else if ((__kmp_user_lock_kind == lk_futex) &&
2947 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2948 lck = (kmp_user_lock_p)user_lock;
2949 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002951 else {
2952 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2953 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002954
2955#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002956 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002958#if OMPT_SUPPORT && OMPT_OPTIONAL
2959 // This is the case, if called from omp_init_lock_with_hint:
2960 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2961 if (!codeptr)
2962 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2963 if (ompt_enabled.ompt_callback_mutex_acquire) {
2964 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2965 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
Joachim Protze40636132018-05-28 08:16:08 +00002966 (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002967 }
2968#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969
Jonathan Peyton30419822017-05-12 18:01:32 +00002970 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002972 if (rc) {
2973 __kmp_itt_lock_acquired(lck);
2974 } else {
2975 __kmp_itt_lock_cancelled(lck);
2976 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002978#if OMPT_SUPPORT && OMPT_OPTIONAL
2979 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2980 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00002981 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00002982 }
2983#endif
2984
Jonathan Peyton30419822017-05-12 18:01:32 +00002985 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002986
Jonathan Peyton30419822017-05-12 18:01:32 +00002987/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002988
2989#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002990}
2991
2992/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002993int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002994#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002995 int rc;
2996#if USE_ITT_BUILD
2997 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2998#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002999#if OMPT_SUPPORT && OMPT_OPTIONAL
3000 // This is the case, if called from omp_init_lock_with_hint:
3001 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3002 if (!codeptr)
3003 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3004 if (ompt_enabled.ompt_callback_mutex_acquire) {
3005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3006 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003007 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
Joachim Protze82e94a52017-11-01 10:08:30 +00003008 codeptr);
3009 }
3010#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003011 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3012#if USE_ITT_BUILD
3013 if (rc) {
3014 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3015 } else {
3016 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3017 }
3018#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00003019#if OMPT_SUPPORT && OMPT_OPTIONAL
3020 if (ompt_enabled.enabled && rc) {
3021 if (rc == 1) {
3022 if (ompt_enabled.ompt_callback_mutex_acquired) {
3023 // lock_first
3024 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003025 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003026 }
3027 } else {
3028 if (ompt_enabled.ompt_callback_nest_lock) {
3029 // lock_next
3030 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003031 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003032 }
3033 }
3034 }
3035#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003036 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003037
3038#else // KMP_USE_DYNAMIC_LOCK
3039
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 kmp_user_lock_p lck;
3041 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042
Jonathan Peyton30419822017-05-12 18:01:32 +00003043 if ((__kmp_user_lock_kind == lk_tas) &&
3044 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3045 OMP_NEST_LOCK_T_SIZE)) {
3046 lck = (kmp_user_lock_p)user_lock;
3047 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003048#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003049 else if ((__kmp_user_lock_kind == lk_futex) &&
3050 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3051 OMP_NEST_LOCK_T_SIZE)) {
3052 lck = (kmp_user_lock_p)user_lock;
3053 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003055 else {
3056 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3057 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003058
3059#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003060 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003061#endif /* USE_ITT_BUILD */
3062
Joachim Protze82e94a52017-11-01 10:08:30 +00003063#if OMPT_SUPPORT && OMPT_OPTIONAL
3064 // This is the case, if called from omp_init_lock_with_hint:
3065 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3066 if (!codeptr)
3067 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3068 if (ompt_enabled.enabled) &&
3069 ompt_enabled.ompt_callback_mutex_acquire) {
3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3071 ompt_mutex_nest_lock, omp_lock_hint_none,
Joachim Protze40636132018-05-28 08:16:08 +00003072 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003073 }
3074#endif
3075
Jonathan Peyton30419822017-05-12 18:01:32 +00003076 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003078 if (rc) {
3079 __kmp_itt_lock_acquired(lck);
3080 } else {
3081 __kmp_itt_lock_cancelled(lck);
3082 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003084#if OMPT_SUPPORT && OMPT_OPTIONAL
3085 if (ompt_enabled.enabled && rc) {
3086 if (rc == 1) {
3087 if (ompt_enabled.ompt_callback_mutex_acquired) {
3088 // lock_first
3089 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
Joachim Protze40636132018-05-28 08:16:08 +00003090 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003091 }
3092 } else {
3093 if (ompt_enabled.ompt_callback_nest_lock) {
3094 // lock_next
3095 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
Joachim Protze40636132018-05-28 08:16:08 +00003096 ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00003097 }
3098 }
3099 }
3100#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003102
Jonathan Peyton30419822017-05-12 18:01:32 +00003103/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003104
3105#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003106}
3107
Jonathan Peyton30419822017-05-12 18:01:32 +00003108// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109
Jonathan Peyton30419822017-05-12 18:01:32 +00003110// keep the selected method in a thread local structure for cross-function
3111// usage: will be used in __kmpc_end_reduce* functions;
3112// another solution: to re-determine the method one more time in
3113// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003114// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003115#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3116 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003117
Jonathan Peyton30419822017-05-12 18:01:32 +00003118#define __KMP_GET_REDUCTION_METHOD(gtid) \
3119 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003120
Jonathan Peyton30419822017-05-12 18:01:32 +00003121// description of the packed_reduction_method variable: look at the macros in
3122// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003123
3124// used in a critical section reduce block
3125static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003126__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3127 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003128
Jonathan Peyton30419822017-05-12 18:01:32 +00003129 // this lock was visible to a customer and to the threading profile tool as a
3130 // serial overhead span (although it's used for an internal purpose only)
3131 // why was it visible in previous implementation?
3132 // should we keep it visible in new reduce block?
3133 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003134
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003135#if KMP_USE_DYNAMIC_LOCK
3136
Jonathan Peyton30419822017-05-12 18:01:32 +00003137 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3138 // Check if it is initialized.
3139 if (*lk == 0) {
3140 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3141 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3142 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003143 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003144 __kmp_init_indirect_csptr(crit, loc, global_tid,
3145 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003146 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003147 }
3148 // Branch for accessing the actual lock object and set operation. This
3149 // branching is inevitable since this lock initialization does not follow the
3150 // normal dispatch path (lock table is not used).
3151 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3152 lck = (kmp_user_lock_p)lk;
3153 KMP_DEBUG_ASSERT(lck != NULL);
3154 if (__kmp_env_consistency_check) {
3155 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3156 }
3157 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3158 } else {
3159 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3160 lck = ilk->lock;
3161 KMP_DEBUG_ASSERT(lck != NULL);
3162 if (__kmp_env_consistency_check) {
3163 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3164 }
3165 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3166 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003167
3168#else // KMP_USE_DYNAMIC_LOCK
3169
Jonathan Peyton30419822017-05-12 18:01:32 +00003170 // We know that the fast reduction code is only emitted by Intel compilers
3171 // with 32 byte critical sections. If there isn't enough space, then we
3172 // have to use a pointer.
3173 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3174 lck = (kmp_user_lock_p)crit;
3175 } else {
3176 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3177 }
3178 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003179
Jonathan Peyton30419822017-05-12 18:01:32 +00003180 if (__kmp_env_consistency_check)
3181 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003182
Jonathan Peyton30419822017-05-12 18:01:32 +00003183 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003184
3185#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003186}
3187
3188// used in a critical section reduce block
3189static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003190__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3191 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003194
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003195#if KMP_USE_DYNAMIC_LOCK
3196
Jonathan Peyton30419822017-05-12 18:01:32 +00003197 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3198 lck = (kmp_user_lock_p)crit;
3199 if (__kmp_env_consistency_check)
3200 __kmp_pop_sync(global_tid, ct_critical, loc);
3201 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3202 } else {
3203 kmp_indirect_lock_t *ilk =
3204 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3205 if (__kmp_env_consistency_check)
3206 __kmp_pop_sync(global_tid, ct_critical, loc);
3207 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3208 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003209
3210#else // KMP_USE_DYNAMIC_LOCK
3211
Jonathan Peyton30419822017-05-12 18:01:32 +00003212 // We know that the fast reduction code is only emitted by Intel compilers
3213 // with 32 byte critical sections. If there isn't enough space, then we have
3214 // to use a pointer.
3215 if (__kmp_base_user_lock_size > 32) {
3216 lck = *((kmp_user_lock_p *)crit);
3217 KMP_ASSERT(lck != NULL);
3218 } else {
3219 lck = (kmp_user_lock_p)crit;
3220 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003221
Jonathan Peyton30419822017-05-12 18:01:32 +00003222 if (__kmp_env_consistency_check)
3223 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224
Jonathan Peyton30419822017-05-12 18:01:32 +00003225 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003226
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003227#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003228} // __kmp_end_critical_section_reduce_block
3229
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003230#if OMP_40_ENABLED
3231static __forceinline int
3232__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3233 int *task_state) {
3234 kmp_team_t *team;
3235
3236 // Check if we are inside the teams construct?
3237 if (th->th.th_teams_microtask) {
3238 *team_p = team = th->th.th_team;
3239 if (team->t.t_level == th->th.th_teams_level) {
3240 // This is reduction at teams construct.
3241 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3242 // Let's swap teams temporarily for the reduction.
3243 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3244 th->th.th_team = team->t.t_parent;
3245 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3246 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3247 *task_state = th->th.th_task_state;
3248 th->th.th_task_state = 0;
3249
3250 return 1;
3251 }
3252 }
3253 return 0;
3254}
3255
3256static __forceinline void
3257__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3258 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3259 th->th.th_info.ds.ds_tid = 0;
3260 th->th.th_team = team;
3261 th->th.th_team_nproc = team->t.t_nproc;
3262 th->th.th_task_team = team->t.t_task_team[task_state];
3263 th->th.th_task_state = task_state;
3264}
3265#endif
3266
Jim Cownie5e8470a2013-09-27 10:38:44 +00003267/* 2.a.i. Reduce Block without a terminating barrier */
3268/*!
3269@ingroup SYNCHRONIZATION
3270@param loc source location information
3271@param global_tid global thread number
3272@param num_vars number of items (variables) to be reduced
3273@param reduce_size size of data in bytes to be reduced
3274@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003275@param reduce_func callback function providing reduction operation on two
3276operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003277@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003278@result 1 for the master thread, 0 for all other team threads, 2 for all team
3279threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003280
3281The nowait version is used for a reduce clause with the nowait argument.
3282*/
3283kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003284__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3285 size_t reduce_size, void *reduce_data,
3286 void (*reduce_func)(void *lhs_data, void *rhs_data),
3287 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003288
Jonathan Peyton30419822017-05-12 18:01:32 +00003289 KMP_COUNT_BLOCK(REDUCE_nowait);
3290 int retval = 0;
3291 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003292#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003293 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003294 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003295 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003296#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003297 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003298
Jonathan Peyton30419822017-05-12 18:01:32 +00003299 // why do we need this initialization here at all?
3300 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003301
Jonathan Peyton30419822017-05-12 18:01:32 +00003302 // do not call __kmp_serial_initialize(), it will be called by
3303 // __kmp_parallel_initialize() if needed
3304 // possible detection of false-positive race by the threadchecker ???
3305 if (!TCR_4(__kmp_init_parallel))
3306 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003307
Jonathan Peyton30419822017-05-12 18:01:32 +00003308// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003309#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003310 if (__kmp_env_consistency_check)
3311 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003312#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003313 if (__kmp_env_consistency_check)
3314 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003315#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003316
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003317#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003318 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003319 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003320#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3323 // the value should be kept in a variable
3324 // the variable should be either a construct-specific or thread-specific
3325 // property, not a team specific property
3326 // (a thread can reach the next reduce block on the next construct, reduce
3327 // method may differ on the next construct)
3328 // an ident_t "loc" parameter could be used as a construct-specific property
3329 // (what if loc == 0?)
3330 // (if both construct-specific and team-specific variables were shared,
3331 // then unness extra syncs should be needed)
3332 // a thread-specific variable is better regarding two issues above (next
3333 // construct and extra syncs)
3334 // a thread-specific "th_local.reduction_method" variable is used currently
3335 // each thread executes 'determine' and 'set' lines (no need to execute by one
3336 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337
Jonathan Peyton30419822017-05-12 18:01:32 +00003338 packed_reduction_method = __kmp_determine_reduction_method(
3339 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3340 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003341
Jonathan Peyton30419822017-05-12 18:01:32 +00003342 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003343
Jonathan Peyton30419822017-05-12 18:01:32 +00003344 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3345 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003346
Jonathan Peyton30419822017-05-12 18:01:32 +00003347 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003348
Jonathan Peyton30419822017-05-12 18:01:32 +00003349 // usage: if team size == 1, no synchronization is required ( Intel
3350 // platforms only )
3351 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003352
Jonathan Peyton30419822017-05-12 18:01:32 +00003353 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003354
Jonathan Peyton30419822017-05-12 18:01:32 +00003355 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003356
Jonathan Peyton30419822017-05-12 18:01:32 +00003357 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3358 // won't be called by the code gen)
3359 // (it's not quite good, because the checking block has been closed by
3360 // this 'pop',
3361 // but atomic operation has not been executed yet, will be executed
3362 // slightly later, literally on next instruction)
3363 if (__kmp_env_consistency_check)
3364 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003365
Jonathan Peyton30419822017-05-12 18:01:32 +00003366 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3367 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003368
Jonathan Peyton30419822017-05-12 18:01:32 +00003369// AT: performance issue: a real barrier here
3370// AT: (if master goes slow, other threads are blocked here waiting for the
3371// master to come and release them)
3372// AT: (it's not what a customer might expect specifying NOWAIT clause)
3373// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3374// be confusing to a customer)
3375// AT: another implementation of *barrier_gather*nowait() (or some other design)
3376// might go faster and be more in line with sense of NOWAIT
3377// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003378
Jonathan Peyton30419822017-05-12 18:01:32 +00003379// this barrier should be invisible to a customer and to the threading profile
3380// tool (it's neither a terminating barrier nor customer's code, it's
3381// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003382#if OMPT_SUPPORT
3383 // JP: can this barrier potentially leed to task scheduling?
3384 // JP: as long as there is a barrier in the implementation, OMPT should and
3385 // will provide the barrier events
3386 // so we set-up the necessary frame/return addresses.
Joachim Protzec5836064b2018-05-28 08:14:58 +00003387 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003388 if (ompt_enabled.enabled) {
3389 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003390 if (ompt_frame->enter_frame == NULL)
3391 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003392 OMPT_STORE_RETURN_ADDRESS(global_tid);
3393 }
3394#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003395#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003396 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003397#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003398 retval =
3399 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3400 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3401 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003402#if OMPT_SUPPORT && OMPT_OPTIONAL
3403 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003404 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003405 }
3406#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003407
Jonathan Peyton30419822017-05-12 18:01:32 +00003408 // all other workers except master should do this pop here
3409 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3410 if (__kmp_env_consistency_check) {
3411 if (retval == 0) {
3412 __kmp_pop_sync(global_tid, ct_reduce, loc);
3413 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003414 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003415
3416 } else {
3417
3418 // should never reach this block
3419 KMP_ASSERT(0); // "unexpected method"
3420 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003421#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003422 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003423 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003424 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003425#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 KA_TRACE(
3427 10,
3428 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3429 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003430
Jonathan Peyton30419822017-05-12 18:01:32 +00003431 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003432}
3433
3434/*!
3435@ingroup SYNCHRONIZATION
3436@param loc source location information
3437@param global_tid global thread id.
3438@param lck pointer to the unique lock data structure
3439
3440Finish the execution of a reduce nowait.
3441*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003442void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3443 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
Jonathan Peyton30419822017-05-12 18:01:32 +00003447 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003450
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003452
Jonathan Peyton30419822017-05-12 18:01:32 +00003453 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454
Jonathan Peyton30419822017-05-12 18:01:32 +00003455 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003456
Jonathan Peyton30419822017-05-12 18:01:32 +00003457 // usage: if team size == 1, no synchronization is required ( on Intel
3458 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003461
Jonathan Peyton30419822017-05-12 18:01:32 +00003462 // neither master nor other workers should get here
3463 // (code gen does not generate this call in case 2: atomic reduce block)
3464 // actually it's better to remove this elseif at all;
3465 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003466
Jonathan Peyton30419822017-05-12 18:01:32 +00003467 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3468 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003469
Jonathan Peyton30419822017-05-12 18:01:32 +00003470 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003471
Jonathan Peyton30419822017-05-12 18:01:32 +00003472 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003473
Jonathan Peyton30419822017-05-12 18:01:32 +00003474 // should never reach this block
3475 KMP_ASSERT(0); // "unexpected method"
3476 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003477
Jonathan Peyton30419822017-05-12 18:01:32 +00003478 if (__kmp_env_consistency_check)
3479 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003480
Jonathan Peyton30419822017-05-12 18:01:32 +00003481 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3482 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003483
Jonathan Peyton30419822017-05-12 18:01:32 +00003484 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003485}
3486
3487/* 2.a.ii. Reduce Block with a terminating barrier */
3488
3489/*!
3490@ingroup SYNCHRONIZATION
3491@param loc source location information
3492@param global_tid global thread number
3493@param num_vars number of items (variables) to be reduced
3494@param reduce_size size of data in bytes to be reduced
3495@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003496@param reduce_func callback function providing reduction operation on two
3497operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003498@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003499@result 1 for the master thread, 0 for all other team threads, 2 for all team
3500threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501
3502A blocking reduce that includes an implicit barrier.
3503*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003504kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3505 size_t reduce_size, void *reduce_data,
3506 void (*reduce_func)(void *lhs_data, void *rhs_data),
3507 kmp_critical_name *lck) {
3508 KMP_COUNT_BLOCK(REDUCE_wait);
3509 int retval = 0;
3510 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003511#if OMP_40_ENABLED
3512 kmp_info_t *th;
3513 kmp_team_t *team;
3514 int teams_swapped = 0, task_state;
3515#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003516
Jonathan Peyton30419822017-05-12 18:01:32 +00003517 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003518
Jonathan Peyton30419822017-05-12 18:01:32 +00003519 // why do we need this initialization here at all?
3520 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003521
Jonathan Peyton30419822017-05-12 18:01:32 +00003522 // do not call __kmp_serial_initialize(), it will be called by
3523 // __kmp_parallel_initialize() if needed
3524 // possible detection of false-positive race by the threadchecker ???
3525 if (!TCR_4(__kmp_init_parallel))
3526 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003527
Jonathan Peyton30419822017-05-12 18:01:32 +00003528// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003529#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003530 if (__kmp_env_consistency_check)
3531 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003532#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003533 if (__kmp_env_consistency_check)
3534 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003535#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003536
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003537#if OMP_40_ENABLED
3538 th = __kmp_thread_from_gtid(global_tid);
3539 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3540#endif // OMP_40_ENABLED
3541
Jonathan Peyton30419822017-05-12 18:01:32 +00003542 packed_reduction_method = __kmp_determine_reduction_method(
3543 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3544 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003545
Jonathan Peyton30419822017-05-12 18:01:32 +00003546 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547
Jonathan Peyton30419822017-05-12 18:01:32 +00003548 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3549 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
Jonathan Peyton30419822017-05-12 18:01:32 +00003551 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 // usage: if team size == 1, no synchronization is required ( Intel
3554 // platforms only )
3555 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003556
Jonathan Peyton30419822017-05-12 18:01:32 +00003557 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003558
Jonathan Peyton30419822017-05-12 18:01:32 +00003559 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560
Jonathan Peyton30419822017-05-12 18:01:32 +00003561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003563
Jonathan Peyton30419822017-05-12 18:01:32 +00003564// case tree_reduce_block:
3565// this barrier should be visible to a customer and to the threading profile
3566// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003567#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003568 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003569 if (ompt_enabled.enabled) {
3570 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003571 if (ompt_frame->enter_frame == NULL)
3572 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003573 OMPT_STORE_RETURN_ADDRESS(global_tid);
3574 }
3575#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003576#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003577 __kmp_threads[global_tid]->th.th_ident =
3578 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003579#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003580 retval =
3581 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3582 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3583 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003584#if OMPT_SUPPORT && OMPT_OPTIONAL
3585 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003586 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003587 }
3588#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003589
Jonathan Peyton30419822017-05-12 18:01:32 +00003590 // all other workers except master should do this pop here
3591 // ( none of other workers except master will enter __kmpc_end_reduce() )
3592 if (__kmp_env_consistency_check) {
3593 if (retval == 0) { // 0: all other workers; 1: master
3594 __kmp_pop_sync(global_tid, ct_reduce, loc);
3595 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003596 }
3597
Jonathan Peyton30419822017-05-12 18:01:32 +00003598 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599
Jonathan Peyton30419822017-05-12 18:01:32 +00003600 // should never reach this block
3601 KMP_ASSERT(0); // "unexpected method"
3602 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003603#if OMP_40_ENABLED
3604 if (teams_swapped) {
3605 __kmp_restore_swapped_teams(th, team, task_state);
3606 }
3607#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003608
3609 KA_TRACE(10,
3610 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3611 global_tid, packed_reduction_method, retval));
3612
3613 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003614}
3615
3616/*!
3617@ingroup SYNCHRONIZATION
3618@param loc source location information
3619@param global_tid global thread id.
3620@param lck pointer to the unique lock data structure
3621
3622Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003623The <tt>lck</tt> pointer must be the same as that used in the corresponding
3624start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003625*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003626void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3627 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003628
Jonathan Peyton30419822017-05-12 18:01:32 +00003629 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003630#if OMP_40_ENABLED
3631 kmp_info_t *th;
3632 kmp_team_t *team;
3633 int teams_swapped = 0, task_state;
3634#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003635
Jonathan Peyton30419822017-05-12 18:01:32 +00003636 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003637
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003638#if OMP_40_ENABLED
3639 th = __kmp_thread_from_gtid(global_tid);
3640 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3641#endif // OMP_40_ENABLED
3642
Jonathan Peyton30419822017-05-12 18:01:32 +00003643 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644
Jonathan Peyton30419822017-05-12 18:01:32 +00003645 // this barrier should be visible to a customer and to the threading profile
3646 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647
Jonathan Peyton30419822017-05-12 18:01:32 +00003648 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003649
Jonathan Peyton30419822017-05-12 18:01:32 +00003650 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003651
Jonathan Peyton30419822017-05-12 18:01:32 +00003652// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003653#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003654 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003655 if (ompt_enabled.enabled) {
3656 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003657 if (ompt_frame->enter_frame == NULL)
3658 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003659 OMPT_STORE_RETURN_ADDRESS(global_tid);
3660 }
3661#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003662#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003663 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003664#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003665 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003666#if OMPT_SUPPORT && OMPT_OPTIONAL
3667 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003668 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003669 }
3670#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003671
Jonathan Peyton30419822017-05-12 18:01:32 +00003672 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003673
Jonathan Peyton30419822017-05-12 18:01:32 +00003674// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675
Jonathan Peyton30419822017-05-12 18:01:32 +00003676// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003677#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003678 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003679 if (ompt_enabled.enabled) {
3680 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003681 if (ompt_frame->enter_frame == NULL)
3682 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003683 OMPT_STORE_RETURN_ADDRESS(global_tid);
3684 }
3685#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003686#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003687 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003688#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003689 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003690#if OMPT_SUPPORT && OMPT_OPTIONAL
3691 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003692 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003693 }
3694#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695
Jonathan Peyton30419822017-05-12 18:01:32 +00003696 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697
Joachim Protze82e94a52017-11-01 10:08:30 +00003698#if OMPT_SUPPORT
Joachim Protzec5836064b2018-05-28 08:14:58 +00003699 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00003700 if (ompt_enabled.enabled) {
3701 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003702 if (ompt_frame->enter_frame == NULL)
3703 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003704 OMPT_STORE_RETURN_ADDRESS(global_tid);
3705 }
3706#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003707// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003708#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003709 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003710#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003711 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003712#if OMPT_SUPPORT && OMPT_OPTIONAL
3713 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003714 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003715 }
3716#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003717
Jonathan Peyton30419822017-05-12 18:01:32 +00003718 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3719 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720
Jonathan Peyton30419822017-05-12 18:01:32 +00003721 // only master executes here (master releases all other workers)
3722 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3723 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724
Jonathan Peyton30419822017-05-12 18:01:32 +00003725 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003726
Jonathan Peyton30419822017-05-12 18:01:32 +00003727 // should never reach this block
3728 KMP_ASSERT(0); // "unexpected method"
3729 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003730#if OMP_40_ENABLED
3731 if (teams_swapped) {
3732 __kmp_restore_swapped_teams(th, team, task_state);
3733 }
3734#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 if (__kmp_env_consistency_check)
3737 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003738
Jonathan Peyton30419822017-05-12 18:01:32 +00003739 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3740 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
Jonathan Peyton30419822017-05-12 18:01:32 +00003742 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743}
3744
3745#undef __KMP_GET_REDUCTION_METHOD
3746#undef __KMP_SET_REDUCTION_METHOD
3747
Jonathan Peyton30419822017-05-12 18:01:32 +00003748/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003749
Jonathan Peyton30419822017-05-12 18:01:32 +00003750kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003751
Jonathan Peyton30419822017-05-12 18:01:32 +00003752 kmp_int32 gtid;
3753 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754
Jonathan Peyton30419822017-05-12 18:01:32 +00003755 gtid = __kmp_get_gtid();
3756 if (gtid < 0) {
3757 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003758 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003759 thread = __kmp_thread_from_gtid(gtid);
3760 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003761
3762} // __kmpc_get_taskid
3763
Jonathan Peyton30419822017-05-12 18:01:32 +00003764kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 kmp_int32 gtid;
3767 kmp_info_t *thread;
3768 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 gtid = __kmp_get_gtid();
3771 if (gtid < 0) {
3772 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003773 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003774 thread = __kmp_thread_from_gtid(gtid);
3775 parent_task = thread->th.th_current_task->td_parent;
3776 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777
3778} // __kmpc_get_parent_taskid
3779
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003780#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003781/*!
3782@ingroup WORK_SHARING
3783@param loc source location information.
3784@param gtid global thread number.
3785@param num_dims number of associated doacross loops.
3786@param dims info on loops bounds.
3787
3788Initialize doacross loop information.
3789Expect compiler send us inclusive bounds,
3790e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3791*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003792void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003793 const struct kmp_dim *dims) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 int j, idx;
3795 kmp_int64 last, trace_count;
3796 kmp_info_t *th = __kmp_threads[gtid];
3797 kmp_team_t *team = th->th.th_team;
3798 kmp_uint32 *flags;
3799 kmp_disp_t *pr_buf = th->th.th_dispatch;
3800 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003801
Jonathan Peyton30419822017-05-12 18:01:32 +00003802 KA_TRACE(
3803 20,
3804 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3805 gtid, num_dims, !team->t.t_serialized));
3806 KMP_DEBUG_ASSERT(dims != NULL);
3807 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003808
Jonathan Peyton30419822017-05-12 18:01:32 +00003809 if (team->t.t_serialized) {
3810 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3811 return; // no dependencies if team is serialized
3812 }
3813 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3814 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3815 // the next loop
3816 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003817
Jonathan Peyton30419822017-05-12 18:01:32 +00003818 // Save bounds info into allocated private buffer
3819 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3820 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3821 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3822 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3823 pr_buf->th_doacross_info[0] =
3824 (kmp_int64)num_dims; // first element is number of dimensions
3825 // Save also address of num_done in order to access it later without knowing
3826 // the buffer index
3827 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3828 pr_buf->th_doacross_info[2] = dims[0].lo;
3829 pr_buf->th_doacross_info[3] = dims[0].up;
3830 pr_buf->th_doacross_info[4] = dims[0].st;
3831 last = 5;
3832 for (j = 1; j < num_dims; ++j) {
3833 kmp_int64
3834 range_length; // To keep ranges of all dimensions but the first dims[0]
3835 if (dims[j].st == 1) { // most common case
3836 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3837 range_length = dims[j].up - dims[j].lo + 1;
3838 } else {
3839 if (dims[j].st > 0) {
3840 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3841 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3842 } else { // negative increment
3843 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3844 range_length =
3845 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3846 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003847 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003848 pr_buf->th_doacross_info[last++] = range_length;
3849 pr_buf->th_doacross_info[last++] = dims[j].lo;
3850 pr_buf->th_doacross_info[last++] = dims[j].up;
3851 pr_buf->th_doacross_info[last++] = dims[j].st;
3852 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003853
Jonathan Peyton30419822017-05-12 18:01:32 +00003854 // Compute total trip count.
3855 // Start with range of dims[0] which we don't need to keep in the buffer.
3856 if (dims[0].st == 1) { // most common case
3857 trace_count = dims[0].up - dims[0].lo + 1;
3858 } else if (dims[0].st > 0) {
3859 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3860 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3861 } else { // negative increment
3862 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3863 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3864 }
3865 for (j = 1; j < num_dims; ++j) {
3866 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3867 }
3868 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003869
Jonathan Peyton30419822017-05-12 18:01:32 +00003870 // Check if shared buffer is not occupied by other loop (idx -
3871 // __kmp_dispatch_num_buffers)
3872 if (idx != sh_buf->doacross_buf_idx) {
3873 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003874 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3875 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003876 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003877#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003878 // Check if we are the first thread. After the CAS the first thread gets 0,
3879 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003880 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3881 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3882 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3883#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003884 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003885 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3886#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003887 if (flags == NULL) {
3888 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003889 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003890 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3891 KMP_MB();
3892 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003893 } else if (flags == (kmp_uint32 *)1) {
3894#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003895 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003896 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3897#else
3898 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3899#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003901 KMP_MB();
3902 } else {
3903 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003904 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003905 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003906 pr_buf->th_doacross_flags =
3907 sh_buf->doacross_flags; // save private copy in order to not
3908 // touch shared buffer on each iteration
3909 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003910}
3911
Jonathan Peyton369d72d2018-07-30 17:48:33 +00003912void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003913 kmp_int32 shft, num_dims, i;
3914 kmp_uint32 flag;
3915 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3916 kmp_info_t *th = __kmp_threads[gtid];
3917 kmp_team_t *team = th->th.th_team;
3918 kmp_disp_t *pr_buf;
3919 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003920
Jonathan Peyton30419822017-05-12 18:01:32 +00003921 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3922 if (team->t.t_serialized) {
3923 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3924 return; // no dependencies if team is serialized
3925 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003926
Jonathan Peyton30419822017-05-12 18:01:32 +00003927 // calculate sequential iteration number and check out-of-bounds condition
3928 pr_buf = th->th.th_dispatch;
3929 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3930 num_dims = pr_buf->th_doacross_info[0];
3931 lo = pr_buf->th_doacross_info[2];
3932 up = pr_buf->th_doacross_info[3];
3933 st = pr_buf->th_doacross_info[4];
3934 if (st == 1) { // most common case
3935 if (vec[0] < lo || vec[0] > up) {
3936 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3937 "bounds [%lld,%lld]\n",
3938 gtid, vec[0], lo, up));
3939 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003940 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003941 iter_number = vec[0] - lo;
3942 } else if (st > 0) {
3943 if (vec[0] < lo || vec[0] > up) {
3944 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3945 "bounds [%lld,%lld]\n",
3946 gtid, vec[0], lo, up));
3947 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003948 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003949 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3950 } else { // negative increment
3951 if (vec[0] > lo || vec[0] < up) {
3952 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3953 "bounds [%lld,%lld]\n",
3954 gtid, vec[0], lo, up));
3955 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003956 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003957 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3958 }
3959 for (i = 1; i < num_dims; ++i) {
3960 kmp_int64 iter, ln;
3961 kmp_int32 j = i * 4;
3962 ln = pr_buf->th_doacross_info[j + 1];
3963 lo = pr_buf->th_doacross_info[j + 2];
3964 up = pr_buf->th_doacross_info[j + 3];
3965 st = pr_buf->th_doacross_info[j + 4];
3966 if (st == 1) {
3967 if (vec[i] < lo || vec[i] > up) {
3968 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3969 "bounds [%lld,%lld]\n",
3970 gtid, vec[i], lo, up));
3971 return;
3972 }
3973 iter = vec[i] - lo;
3974 } else if (st > 0) {
3975 if (vec[i] < lo || vec[i] > up) {
3976 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3977 "bounds [%lld,%lld]\n",
3978 gtid, vec[i], lo, up));
3979 return;
3980 }
3981 iter = (kmp_uint64)(vec[i] - lo) / st;
3982 } else { // st < 0
3983 if (vec[i] > lo || vec[i] < up) {
3984 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3985 "bounds [%lld,%lld]\n",
3986 gtid, vec[i], lo, up));
3987 return;
3988 }
3989 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3990 }
3991 iter_number = iter + ln * iter_number;
3992 }
3993 shft = iter_number % 32; // use 32-bit granularity
3994 iter_number >>= 5; // divided by 32
3995 flag = 1 << shft;
3996 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3997 KMP_YIELD(TRUE);
3998 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003999 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004000 KA_TRACE(20,
4001 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4002 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004003}
4004
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004005void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004006 kmp_int32 shft, num_dims, i;
4007 kmp_uint32 flag;
4008 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4009 kmp_info_t *th = __kmp_threads[gtid];
4010 kmp_team_t *team = th->th.th_team;
4011 kmp_disp_t *pr_buf;
4012 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004013
Jonathan Peyton30419822017-05-12 18:01:32 +00004014 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4015 if (team->t.t_serialized) {
4016 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4017 return; // no dependencies if team is serialized
4018 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00004019
Jonathan Peyton30419822017-05-12 18:01:32 +00004020 // calculate sequential iteration number (same as in "wait" but no
4021 // out-of-bounds checks)
4022 pr_buf = th->th.th_dispatch;
4023 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4024 num_dims = pr_buf->th_doacross_info[0];
4025 lo = pr_buf->th_doacross_info[2];
4026 st = pr_buf->th_doacross_info[4];
4027 if (st == 1) { // most common case
4028 iter_number = vec[0] - lo;
4029 } else if (st > 0) {
4030 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4031 } else { // negative increment
4032 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4033 }
4034 for (i = 1; i < num_dims; ++i) {
4035 kmp_int64 iter, ln;
4036 kmp_int32 j = i * 4;
4037 ln = pr_buf->th_doacross_info[j + 1];
4038 lo = pr_buf->th_doacross_info[j + 2];
4039 st = pr_buf->th_doacross_info[j + 4];
4040 if (st == 1) {
4041 iter = vec[i] - lo;
4042 } else if (st > 0) {
4043 iter = (kmp_uint64)(vec[i] - lo) / st;
4044 } else { // st < 0
4045 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004046 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 iter_number = iter + ln * iter_number;
4048 }
4049 shft = iter_number % 32; // use 32-bit granularity
4050 iter_number >>= 5; // divided by 32
4051 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004052 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004053 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004054 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004055 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4056 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004057}
4058
Jonathan Peyton30419822017-05-12 18:01:32 +00004059void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004060 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004061 kmp_info_t *th = __kmp_threads[gtid];
4062 kmp_team_t *team = th->th.th_team;
4063 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004064
Jonathan Peyton30419822017-05-12 18:01:32 +00004065 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4066 if (team->t.t_serialized) {
4067 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4068 return; // nothing to do
4069 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004070 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004071 if (num_done == th->th.th_team_nproc) {
4072 // we are the last thread, need to free shared resources
4073 int idx = pr_buf->th_doacross_buf_idx - 1;
4074 dispatch_shared_info_t *sh_buf =
4075 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4076 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4077 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004078 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004079 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004080 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004081 sh_buf->doacross_flags = NULL;
4082 sh_buf->doacross_num_done = 0;
4083 sh_buf->doacross_buf_idx +=
4084 __kmp_dispatch_num_buffers; // free buffer for future re-use
4085 }
4086 // free private resources (need to keep buffer index forever)
Jonathan Peyton369d72d2018-07-30 17:48:33 +00004087 pr_buf->th_doacross_flags = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004088 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4089 pr_buf->th_doacross_info = NULL;
4090 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004091}
4092#endif
4093
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004094#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004095int __kmpc_get_target_offload(void) {
4096 if (!__kmp_init_serial) {
4097 __kmp_serial_initialize();
4098 }
4099 return __kmp_target_offload;
4100}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004101#endif // OMP_50_ENABLED
4102
Jim Cownie5e8470a2013-09-27 10:38:44 +00004103// end of file //