blob: 61a8c89fdf11d6f2ee24dc355cf4dc6d3ca2fe9f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_i18n.h"
18#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000019#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000022#if OMPT_SUPPORT
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#include "ompt-specific.h"
24#endif
25
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#define MAX_MESSAGE 512
27
Jonathan Peyton30419822017-05-12 18:01:32 +000028// flags will be used in future, e.g. to implement openmp_strict library
29// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 */
Jonathan Peyton30419822017-05-12 18:01:32 +000039void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40 // By default __kmpc_begin() is no-op.
41 char *env;
42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46 } else if (__kmp_ignore_mppbeg() == FALSE) {
47 // By default __kmp_ignore_mppbeg() returns TRUE.
48 __kmp_internal_begin();
49 KC_TRACE(10, ("__kmpc_begin: called\n"));
50 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000051}
52
53/*!
54 * @ingroup STARTUP_SHUTDOWN
55 * @param loc source location information
56 *
Jonathan Peyton30419822017-05-12 18:01:32 +000057 * Shutdown the runtime library. This is also optional, and even if called will
58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 * zero.
60 */
61void __kmpc_end(ident_t *loc) {
62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65 // returns FALSE and __kmpc_end() will unregister this root (it can cause
66 // library shut down).
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, ("__kmpc_end: called\n"));
69 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 __kmp_internal_end_thread(-1);
72 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000073}
74
75/*!
76@ingroup THREAD_STATES
77@param loc Source location information.
78@return The global thread index of the active thread.
79
80This function can be called in any context.
81
82If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000083single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
84that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000085active parallel construct. (Or zero if there is no active parallel
86construct, since the master thread is necessarily thread zero).
87
88If multiple non-OpenMP threads all enter an OpenMP construct then this
89will be a unique thread identifier among all the threads created by
90the OpenMP runtime (but the value cannote be defined in terms of
91OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000092*/
Jonathan Peyton30419822017-05-12 18:01:32 +000093kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
94 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +000095
Jonathan Peyton30419822017-05-12 18:01:32 +000096 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000097
Jonathan Peyton30419822017-05-12 18:01:32 +000098 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +000099}
100
101/*!
102@ingroup THREAD_STATES
103@param loc Source location information.
104@return The number of threads under control of the OpenMP<sup>*</sup> runtime
105
106This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000107It returns the total number of threads under the control of the OpenMP runtime.
108That is not a number that can be determined by any OpenMP standard calls, since
109the library may be called from more than one non-OpenMP thread, and this
110reflects the total over all such calls. Similarly the runtime maintains
111underlying threads even when they are not active (since the cost of creating
112and destroying OS threads is high), this call counts all such threads even if
113they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000115kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
116 KC_TRACE(10,
117 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118
Jonathan Peyton30419822017-05-12 18:01:32 +0000119 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120}
121
122/*!
123@ingroup THREAD_STATES
124@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000125@return The thread number of the calling thread in the innermost active parallel
126construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000128kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
129 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
130 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131}
132
133/*!
134@ingroup THREAD_STATES
135@param loc Source location information.
136@return The number of threads in the innermost active parallel construct.
137*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000138kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
139 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140
Jonathan Peyton30419822017-05-12 18:01:32 +0000141 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142}
143
144/*!
145 * @ingroup DEPRECATED
146 * @param loc location description
147 *
148 * This function need not be called. It always returns TRUE.
149 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000150kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151#ifndef KMP_DEBUG
152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154
155#else
156
Jonathan Peyton30419822017-05-12 18:01:32 +0000157 const char *semi2;
158 const char *semi3;
159 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160
Jonathan Peyton30419822017-05-12 18:01:32 +0000161 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000163 }
164 semi2 = loc->psource;
165 if (semi2 == NULL) {
166 return TRUE;
167 }
168 semi2 = strchr(semi2, ';');
169 if (semi2 == NULL) {
170 return TRUE;
171 }
172 semi2 = strchr(semi2 + 1, ';');
173 if (semi2 == NULL) {
174 return TRUE;
175 }
176 if (__kmp_par_range_filename[0]) {
177 const char *name = semi2 - 1;
178 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
179 name--;
180 }
181 if ((*name == '/') || (*name == ';')) {
182 name++;
183 }
184 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
185 return __kmp_par_range < 0;
186 }
187 }
188 semi3 = strchr(semi2 + 1, ';');
189 if (__kmp_par_range_routine[0]) {
190 if ((semi3 != NULL) && (semi3 > semi2) &&
191 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
192 return __kmp_par_range < 0;
193 }
194 }
195 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
196 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
197 return __kmp_par_range > 0;
198 }
199 return __kmp_par_range < 0;
200 }
201 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202
203#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204}
205
206/*!
207@ingroup THREAD_STATES
208@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000209@return 1 if this thread is executing inside an active parallel region, zero if
210not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000212kmp_int32 __kmpc_in_parallel(ident_t *loc) {
213 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214}
215
216/*!
217@ingroup PARALLEL
218@param loc source location information
219@param global_tid global thread number
220@param num_threads number of threads requested for this parallel construct
221
222Set the number of threads to be used by the next fork spawned by this thread.
223This call is only required if the parallel construct has a `num_threads` clause.
224*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000225void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
226 kmp_int32 num_threads) {
227 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
228 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231}
232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
234 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235
Jonathan Peyton30419822017-05-12 18:01:32 +0000236 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237}
238
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239#if OMP_40_ENABLED
240
Jonathan Peyton30419822017-05-12 18:01:32 +0000241void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
242 kmp_int32 proc_bind) {
243 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
244 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245
Jonathan Peyton30419822017-05-12 18:01:32 +0000246 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247}
248
249#endif /* OMP_40_ENABLED */
250
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251/*!
252@ingroup PARALLEL
253@param loc source location information
254@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000255@param microtask pointer to callback routine consisting of outlined parallel
256construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257@param ... pointers to shared variables that aren't global
258
259Do the actual fork and call the microtask in the relevant number of threads.
260*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000261void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000263
Jonathan Peyton61118492016-05-20 19:03:38 +0000264#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000265 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 if (inParallel) {
267 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
268 } else {
269 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000270 }
271#endif
272
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273 // maybe to save thr_state is enough here
274 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000275 va_list ap;
276 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000278#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000279 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000280 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 kmp_info_t *master_th = __kmp_threads[gtid];
282 kmp_team_t *parent_team = master_th->th.th_team;
283 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
284 if (lwt)
285 ompt_frame = &(lwt->ompt_task_info.frame);
286 else {
287 int tid = __kmp_tid_from_gtid(gtid);
288 ompt_frame = &(
289 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
290 }
Joachim Protzec255ca72017-11-05 14:11:10 +0000291 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000292 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000293 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000294#endif
295
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000296#if INCLUDE_SSC_MARKS
297 SSC_MARK_FORKING();
298#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000299 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000300 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
301 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000302/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000303#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000304 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000305#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000306 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000309#if INCLUDE_SSC_MARKS
310 SSC_MARK_JOINING();
311#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000313#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 ,
315 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000316#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000318
Jonathan Peyton30419822017-05-12 18:01:32 +0000319 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000320 }
321}
322
323#if OMP_40_ENABLED
324/*!
325@ingroup PARALLEL
326@param loc source location information
327@param global_tid global thread number
328@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000329@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330
331Set the number of teams to be used by the teams construct.
332This call is only required if the teams construct has a `num_teams` clause
333or a `thread_limit` clause (or both).
334*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000335void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
336 kmp_int32 num_teams, kmp_int32 num_threads) {
337 KA_TRACE(20,
338 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
339 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000340
Jonathan Peyton30419822017-05-12 18:01:32 +0000341 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000342}
343
344/*!
345@ingroup PARALLEL
346@param loc source location information
347@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000348@param microtask pointer to callback routine consisting of outlined teams
349construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000350@param ... pointers to shared variables that aren't global
351
352Do the actual fork and call the microtask in the relevant number of threads.
353*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000354void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
355 ...) {
356 int gtid = __kmp_entry_gtid();
357 kmp_info_t *this_thr = __kmp_threads[gtid];
358 va_list ap;
359 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360
Jonathan Peyton30419822017-05-12 18:01:32 +0000361 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000362
Jonathan Peyton30419822017-05-12 18:01:32 +0000363 // remember teams entry point and nesting level
364 this_thr->th.th_teams_microtask = microtask;
365 this_thr->th.th_teams_level =
366 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000368#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000369 kmp_team_t *parent_team = this_thr->th.th_team;
370 int tid = __kmp_tid_from_gtid(gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000371 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000372 parent_team->t.t_implicit_task_taskdata[tid]
Joachim Protzec255ca72017-11-05 14:11:10 +0000373 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Jonathan Peyton30419822017-05-12 18:01:32 +0000374 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000375 OMPT_STORE_RETURN_ADDRESS(gtid);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000376#endif
377
Jonathan Peyton30419822017-05-12 18:01:32 +0000378 // check if __kmpc_push_num_teams called, set default number of teams
379 // otherwise
380 if (this_thr->th.th_teams_size.nteams == 0) {
381 __kmp_push_num_teams(loc, gtid, 0, 0);
382 }
383 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
384 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
385 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000386
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 VOLATILE_CAST(microtask_t)
389 __kmp_teams_master, // "wrapped" task
390 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000391#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000392 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000393#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000395#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 );
397 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000398#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 ,
400 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000401#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000403
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 this_thr->th.th_teams_microtask = NULL;
405 this_thr->th.th_teams_level = 0;
406 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
407 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000408}
409#endif /* OMP_40_ENABLED */
410
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411// I don't think this function should ever have been exported.
412// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
413// openmp code ever called it, but it's been exported from the RTL for so
414// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000415int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416
417/*!
418@ingroup PARALLEL
419@param loc source location information
420@param global_tid global thread number
421
422Enter a serialized parallel construct. This interface is used to handle a
423conditional parallel region, like this,
424@code
425#pragma omp parallel if (condition)
426@endcode
427when the condition is false.
428*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000429void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000430// The implementation is now in kmp_runtime.cpp so that it can share static
431// functions with kmp_fork_call since the tasks to be done are similar in
432// each case.
433#if OMPT_SUPPORT
434 OMPT_STORE_RETURN_ADDRESS(global_tid);
435#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000436 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000437}
438
439/*!
440@ingroup PARALLEL
441@param loc source location information
442@param global_tid global thread number
443
444Leave a serialized parallel construct.
445*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000446void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
447 kmp_internal_control_t *top;
448 kmp_info_t *this_thr;
449 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450
Jonathan Peyton30419822017-05-12 18:01:32 +0000451 KC_TRACE(10,
452 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453
Jonathan Peyton30419822017-05-12 18:01:32 +0000454 /* skip all this code for autopar serialized loops since it results in
455 unacceptable overhead */
456 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
457 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000458
Jonathan Peyton30419822017-05-12 18:01:32 +0000459 // Not autopar code
460 if (!TCR_4(__kmp_init_parallel))
461 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000462
Jonathan Peyton30419822017-05-12 18:01:32 +0000463 this_thr = __kmp_threads[global_tid];
464 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000465
Jonathan Peyton30419822017-05-12 18:01:32 +0000466#if OMP_45_ENABLED
467 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000468
Jonathan Peyton30419822017-05-12 18:01:32 +0000469 // we need to wait for the proxy tasks before finishing the thread
470 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
471 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
472#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 KMP_MB();
475 KMP_DEBUG_ASSERT(serial_team);
476 KMP_ASSERT(serial_team->t.t_serialized);
477 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
478 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
479 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
480 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000481
Joachim Protze82e94a52017-11-01 10:08:30 +0000482#if OMPT_SUPPORT
483 if (ompt_enabled.enabled &&
484 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000485 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +0000486 if (ompt_enabled.ompt_callback_implicit_task) {
487 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
488 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
489 __kmp_tid_from_gtid(global_tid));
490 }
491
492 // reset clear the task id only after unlinking the task
493 ompt_data_t *parent_task_data;
494 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
495
496 if (ompt_enabled.ompt_callback_parallel_end) {
497 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
498 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
499 ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
500 }
501 __ompt_lw_taskteam_unlink(this_thr);
502 this_thr->th.ompt_thread_info.state = omp_state_overhead;
503 }
504#endif
505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 /* If necessary, pop the internal control stack values and replace the team
507 * values */
508 top = serial_team->t.t_control_stack_top;
509 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
510 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
511 serial_team->t.t_control_stack_top = top->next;
512 __kmp_free(top);
513 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 // if( serial_team -> t.t_serialized > 1 )
516 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517
Jonathan Peyton30419822017-05-12 18:01:32 +0000518 /* pop dispatch buffers stack */
519 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
520 {
521 dispatch_private_info_t *disp_buffer =
522 serial_team->t.t_dispatch->th_disp_buffer;
523 serial_team->t.t_dispatch->th_disp_buffer =
524 serial_team->t.t_dispatch->th_disp_buffer->next;
525 __kmp_free(disp_buffer);
526 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
Jonathan Peyton30419822017-05-12 18:01:32 +0000528 --serial_team->t.t_serialized;
529 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532
533#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
535 __kmp_clear_x87_fpu_status_word();
536 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
537 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
538 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
540
Jonathan Peyton30419822017-05-12 18:01:32 +0000541 this_thr->th.th_team = serial_team->t.t_parent;
542 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000543
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 /* restore values cached in the thread */
545 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
546 this_thr->th.th_team_master =
547 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
548 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549
Jonathan Peyton30419822017-05-12 18:01:32 +0000550 /* TODO the below shouldn't need to be adjusted for serialized teams */
551 this_thr->th.th_dispatch =
552 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000553
Jonathan Peyton30419822017-05-12 18:01:32 +0000554 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555
Jonathan Peyton30419822017-05-12 18:01:32 +0000556 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
557 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 if (__kmp_tasking_mode != tskm_immediate_exec) {
560 // Copy the task team from the new child / old parent team to the thread.
561 this_thr->th.th_task_team =
562 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
563 KA_TRACE(20,
564 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
565 "team %p\n",
566 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000567 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000568 } else {
569 if (__kmp_tasking_mode != tskm_immediate_exec) {
570 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
571 "depth of serial team %p to %d\n",
572 global_tid, serial_team, serial_team->t.t_serialized));
573 }
574 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575
Jonathan Peyton30419822017-05-12 18:01:32 +0000576 if (__kmp_env_consistency_check)
577 __kmp_pop_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000578#if OMPT_SUPPORT
579 if (ompt_enabled.enabled)
580 this_thr->th.ompt_thread_info.state =
581 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
582 : omp_state_work_parallel);
583#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584}
585
586/*!
587@ingroup SYNCHRONIZATION
588@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000590Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591depending on the memory ordering convention obeyed by the compiler
592even that may not be necessary).
593*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000594void __kmpc_flush(ident_t *loc) {
595 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596
Jonathan Peyton30419822017-05-12 18:01:32 +0000597 /* need explicit __mf() here since use volatile instead in library */
598 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599
Jonathan Peyton30419822017-05-12 18:01:32 +0000600#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
601#if KMP_MIC
602// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
603// We shouldn't need it, though, since the ABI rules require that
604// * If the compiler generates NGO stores it also generates the fence
605// * If users hand-code NGO stores they should insert the fence
606// therefore no incomplete unordered stores should be visible.
607#else
608 // C74404
609 // This is to address non-temporal store instructions (sfence needed).
610 // The clflush instruction is addressed either (mfence needed).
611 // Probably the non-temporal load monvtdqa instruction should also be
612 // addressed.
613 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
614 if (!__kmp_cpuinfo.initialized) {
615 __kmp_query_cpuid(&__kmp_cpuinfo);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000616 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000617 if (!__kmp_cpuinfo.sse2) {
618 // CPU cannot execute SSE2 instructions.
619 } else {
620#if KMP_COMPILER_ICC
621 _mm_mfence();
622#elif KMP_COMPILER_MSVC
623 MemoryBarrier();
624#else
625 __sync_synchronize();
626#endif // KMP_COMPILER_ICC
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000627 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000628#endif // KMP_MIC
629#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
630// Nothing to see here move along
631#elif KMP_ARCH_PPC64
632// Nothing needed here (we have a real MB above).
633#if KMP_OS_CNK
634 // The flushing thread needs to yield here; this prevents a
635 // busy-waiting thread from saturating the pipeline. flush is
636 // often used in loops like this:
637 // while (!flag) {
638 // #pragma omp flush(flag)
639 // }
640 // and adding the yield here is good for at least a 10x speedup
641 // when running >2 threads per core (on the NAS LU benchmark).
642 __kmp_yield(TRUE);
643#endif
644#else
645#error Unknown or unsupported architecture
646#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000647
648#if OMPT_SUPPORT && OMPT_OPTIONAL
649 if (ompt_enabled.ompt_callback_flush) {
650 ompt_callbacks.ompt_callback(ompt_callback_flush)(
651 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
652 }
653#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654}
655
656/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657/*!
658@ingroup SYNCHRONIZATION
659@param loc source location information
660@param global_tid thread id.
661
662Execute a barrier.
663*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000664void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
665 KMP_COUNT_BLOCK(OMP_BARRIER);
666 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000667
Jonathan Peyton30419822017-05-12 18:01:32 +0000668 if (!TCR_4(__kmp_init_parallel))
669 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670
Jonathan Peyton30419822017-05-12 18:01:32 +0000671 if (__kmp_env_consistency_check) {
672 if (loc == 0) {
673 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675
Jonathan Peyton30419822017-05-12 18:01:32 +0000676 __kmp_check_barrier(global_tid, ct_barrier, loc);
677 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678
Joachim Protze82e94a52017-11-01 10:08:30 +0000679#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000680 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +0000681 if (ompt_enabled.enabled) {
682 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +0000683 if (ompt_frame->enter_frame == NULL)
684 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000685 OMPT_STORE_RETURN_ADDRESS(global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000687#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000688 __kmp_threads[global_tid]->th.th_ident = loc;
689 // TODO: explicit barrier_wait_id:
690 // this function is called when 'barrier' directive is present or
691 // implicit barrier at the end of a worksharing construct.
692 // 1) better to add a per-thread barrier counter to a thread data structure
693 // 2) set to 0 when a new team is created
694 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +0000697#if OMPT_SUPPORT && OMPT_OPTIONAL
698 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +0000699 ompt_frame->enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +0000700 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702}
703
704/* The BARRIER for a MASTER section is always explicit */
705/*!
706@ingroup WORK_SHARING
707@param loc source location information.
708@param global_tid global thread number .
709@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
710*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000711kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
712 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713
Jonathan Peyton30419822017-05-12 18:01:32 +0000714 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 if (!TCR_4(__kmp_init_parallel))
717 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000718
Jonathan Peyton30419822017-05-12 18:01:32 +0000719 if (KMP_MASTER_GTID(global_tid)) {
720 KMP_COUNT_BLOCK(OMP_MASTER);
721 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
722 status = 1;
723 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724
Joachim Protze82e94a52017-11-01 10:08:30 +0000725#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000726 if (status) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000727 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000728 kmp_info_t *this_thr = __kmp_threads[global_tid];
729 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000730
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000732 ompt_callbacks.ompt_callback(ompt_callback_master)(
733 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
734 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
735 OMPT_GET_RETURN_ADDRESS(0));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000736 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000737 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000738#endif
739
Jonathan Peyton30419822017-05-12 18:01:32 +0000740 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000741#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 if (status)
743 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
744 else
745 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000746#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 if (status)
748 __kmp_push_sync(global_tid, ct_master, loc, NULL);
749 else
750 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000751#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000752 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753
Jonathan Peyton30419822017-05-12 18:01:32 +0000754 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755}
756
757/*!
758@ingroup WORK_SHARING
759@param loc source location information.
760@param global_tid global thread number .
761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762Mark the end of a <tt>master</tt> region. This should only be called by the
763thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000764*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000765void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
766 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767
Jonathan Peyton30419822017-05-12 18:01:32 +0000768 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
769 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770
Joachim Protze82e94a52017-11-01 10:08:30 +0000771#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000772 kmp_info_t *this_thr = __kmp_threads[global_tid];
773 kmp_team_t *team = this_thr->th.th_team;
Joachim Protze82e94a52017-11-01 10:08:30 +0000774 if (ompt_enabled.ompt_callback_master) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000775 int tid = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +0000776 ompt_callbacks.ompt_callback(ompt_callback_master)(
777 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
779 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000781#endif
782
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 if (__kmp_env_consistency_check) {
784 if (global_tid < 0)
785 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 if (KMP_MASTER_GTID(global_tid))
788 __kmp_pop_sync(global_tid, ct_master, loc);
789 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790}
791
792/*!
793@ingroup WORK_SHARING
794@param loc source location information.
795@param gtid global thread number.
796
797Start execution of an <tt>ordered</tt> construct.
798*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000799void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
800 int cid = 0;
801 kmp_info_t *th;
802 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000805
Jonathan Peyton30419822017-05-12 18:01:32 +0000806 if (!TCR_4(__kmp_init_parallel))
807 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808
809#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000810 __kmp_itt_ordered_prep(gtid);
811// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000812#endif /* USE_ITT_BUILD */
813
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000815
Joachim Protze82e94a52017-11-01 10:08:30 +0000816#if OMPT_SUPPORT && OMPT_OPTIONAL
817 kmp_team_t *team;
818 ompt_wait_id_t lck;
819 void *codeptr_ra;
820 if (ompt_enabled.enabled) {
821 OMPT_STORE_RETURN_ADDRESS(gtid);
822 team = __kmp_team_from_gtid(gtid);
823 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
Jonathan Peyton30419822017-05-12 18:01:32 +0000824 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000825 th->th.ompt_thread_info.wait_id = lck;
826 th->th.ompt_thread_info.state = omp_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000827
Jonathan Peyton30419822017-05-12 18:01:32 +0000828 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000829 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
830 if (ompt_enabled.ompt_callback_mutex_acquire) {
831 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
Joachim Protze1b2bd262018-01-17 10:06:01 +0000832 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
Joachim Protze82e94a52017-11-01 10:08:30 +0000833 (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000834 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000836#endif
837
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 if (th->th.th_dispatch->th_deo_fcn != 0)
839 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
840 else
841 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842
Joachim Protze82e94a52017-11-01 10:08:30 +0000843#if OMPT_SUPPORT && OMPT_OPTIONAL
844 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000845 /* OMPT state update */
Joachim Protze82e94a52017-11-01 10:08:30 +0000846 th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +0000847 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000848
Jonathan Peyton30419822017-05-12 18:01:32 +0000849 /* OMPT event callback */
Joachim Protze82e94a52017-11-01 10:08:30 +0000850 if (ompt_enabled.ompt_callback_mutex_acquired) {
851 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
852 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000853 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000855#endif
856
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859#endif /* USE_ITT_BUILD */
860}
861
862/*!
863@ingroup WORK_SHARING
864@param loc source location information.
865@param gtid global thread number.
866
867End execution of an <tt>ordered</tt> construct.
868*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000869void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
870 int cid = 0;
871 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874
875#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000876 __kmp_itt_ordered_end(gtid);
877// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000878#endif /* USE_ITT_BUILD */
879
Jonathan Peyton30419822017-05-12 18:01:32 +0000880 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000881
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 if (th->th.th_dispatch->th_dxo_fcn != 0)
883 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
884 else
885 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000886
Joachim Protze82e94a52017-11-01 10:08:30 +0000887#if OMPT_SUPPORT && OMPT_OPTIONAL
888 OMPT_STORE_RETURN_ADDRESS(gtid);
889 if (ompt_enabled.ompt_callback_mutex_released) {
890 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
891 ompt_mutex_ordered,
892 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
893 OMPT_LOAD_RETURN_ADDRESS(gtid));
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000895#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896}
897
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000898#if KMP_USE_DYNAMIC_LOCK
899
Jonathan Peytondae13d82015-12-11 21:57:06 +0000900static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000901__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
902 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
903 // Pointer to the allocated indirect lock is written to crit, while indexing
904 // is ignored.
905 void *idx;
906 kmp_indirect_lock_t **lck;
907 lck = (kmp_indirect_lock_t **)crit;
908 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
909 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
910 KMP_SET_I_LOCK_LOCATION(ilk, loc);
911 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
912 KA_TRACE(20,
913 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000914#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000915 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000916#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000917 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000918 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000919#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000921#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000922 // We don't really need to destroy the unclaimed lock here since it will be
923 // cleaned up at program exit.
924 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
925 }
926 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000927}
928
929// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000930#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
931 { \
932 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
933 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
934 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
935 KMP_LOCK_BUSY(gtid + 1, tas))) { \
936 kmp_uint32 spins; \
937 KMP_FSYNC_PREPARE(l); \
938 KMP_INIT_YIELD(spins); \
939 if (TCR_4(__kmp_nth) > \
940 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
941 KMP_YIELD(TRUE); \
942 } else { \
943 KMP_YIELD_SPIN(spins); \
944 } \
945 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
946 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
947 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
948 KMP_LOCK_BUSY(gtid + 1, tas))) { \
949 __kmp_spin_backoff(&backoff); \
950 if (TCR_4(__kmp_nth) > \
951 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
952 KMP_YIELD(TRUE); \
953 } else { \
954 KMP_YIELD_SPIN(spins); \
955 } \
956 } \
957 } \
958 KMP_FSYNC_ACQUIRED(l); \
959 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000960
961// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000962#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
963 { \
964 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
965 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
966 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
967 KMP_LOCK_BUSY(gtid + 1, tas)); \
968 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000969
970// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000971#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
972 { \
973 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
974 KMP_MB(); \
975 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000976
Jonathan Peytondae13d82015-12-11 21:57:06 +0000977#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000978
Jonathan Peyton30419822017-05-12 18:01:32 +0000979#include <sys/syscall.h>
980#include <unistd.h>
981#ifndef FUTEX_WAIT
982#define FUTEX_WAIT 0
983#endif
984#ifndef FUTEX_WAKE
985#define FUTEX_WAKE 1
986#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000987
988// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000989#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
990 { \
991 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
992 kmp_int32 gtid_code = (gtid + 1) << 1; \
993 KMP_MB(); \
994 KMP_FSYNC_PREPARE(ftx); \
995 kmp_int32 poll_val; \
996 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
997 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
998 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
999 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1000 if (!cond) { \
1001 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1002 poll_val | \
1003 KMP_LOCK_BUSY(1, futex))) { \
1004 continue; \
1005 } \
1006 poll_val |= KMP_LOCK_BUSY(1, futex); \
1007 } \
1008 kmp_int32 rc; \
1009 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1010 NULL, NULL, 0)) != 0) { \
1011 continue; \
1012 } \
1013 gtid_code |= 1; \
1014 } \
1015 KMP_FSYNC_ACQUIRED(ftx); \
1016 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001017
1018// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001019#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1020 { \
1021 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1022 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1023 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1024 KMP_FSYNC_ACQUIRED(ftx); \
1025 rc = TRUE; \
1026 } else { \
1027 rc = FALSE; \
1028 } \
1029 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001030
1031// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +00001032#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1033 { \
1034 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1035 KMP_MB(); \
1036 KMP_FSYNC_RELEASING(ftx); \
1037 kmp_int32 poll_val = \
1038 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1039 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1040 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1041 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1042 } \
1043 KMP_MB(); \
1044 KMP_YIELD(TCR_4(__kmp_nth) > \
1045 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1046 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001047
Jonathan Peytondae13d82015-12-11 21:57:06 +00001048#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001049
1050#else // KMP_USE_DYNAMIC_LOCK
1051
Jonathan Peyton30419822017-05-12 18:01:32 +00001052static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1053 ident_t const *loc,
1054 kmp_int32 gtid) {
1055 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001056
Jonathan Peyton30419822017-05-12 18:01:32 +00001057 // Because of the double-check, the following load doesn't need to be volatile
1058 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 if (lck == NULL) {
1061 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062
Jonathan Peyton30419822017-05-12 18:01:32 +00001063 // Allocate & initialize the lock.
1064 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1065 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1066 __kmp_init_user_lock_with_checks(lck);
1067 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 __kmp_itt_critical_creating(lck);
1070// __kmp_itt_critical_creating() should be called *before* the first usage
1071// of underlying lock. It is the only place where we can guarantee it. There
1072// are chances the lock will destroyed with no usage, but it is not a
1073// problem, because this is not real event seen by user but rather setting
1074// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075#endif /* USE_ITT_BUILD */
1076
Jonathan Peyton30419822017-05-12 18:01:32 +00001077 // Use a cmpxchg instruction to slam the start of the critical section with
1078 // the lock pointer. If another thread beat us to it, deallocate the lock,
1079 // and use the lock that the other thread allocated.
1080 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001081
Jonathan Peyton30419822017-05-12 18:01:32 +00001082 if (status == 0) {
1083// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 __kmp_itt_critical_destroyed(lck);
1086// Let ITT know the lock is destroyed and the same memory location may be reused
1087// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001089 __kmp_destroy_user_lock_with_checks(lck);
1090 __kmp_user_lock_free(&idx, gtid, lck);
1091 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1092 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001093 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001094 }
1095 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096}
1097
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001098#endif // KMP_USE_DYNAMIC_LOCK
1099
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100/*!
1101@ingroup WORK_SHARING
1102@param loc source location information.
1103@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001104@param crit identity of the critical section. This could be a pointer to a lock
1105associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001106
1107Enter code protected by a `critical` construct.
1108This function blocks until the executing thread can enter the critical section.
1109*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001110void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1111 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001112#if KMP_USE_DYNAMIC_LOCK
Joachim Protze82e94a52017-11-01 10:08:30 +00001113#if OMPT_SUPPORT && OMPT_OPTIONAL
1114 OMPT_STORE_RETURN_ADDRESS(global_tid);
1115#endif // OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001116 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001117#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001118 KMP_COUNT_BLOCK(OMP_CRITICAL);
1119 KMP_TIME_PARTITIONED_BLOCK(
1120 OMP_critical_wait); /* Time spent waiting to enter the critical section */
Joachim Protze82e94a52017-11-01 10:08:30 +00001121#if OMPT_SUPPORT && OMPT_OPTIONAL
1122 omp_state_t prev_state = omp_state_undefined;
1123 ompt_thread_info_t ti;
1124#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001125 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001126
Jonathan Peyton30419822017-05-12 18:01:32 +00001127 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001128
Jonathan Peyton30419822017-05-12 18:01:32 +00001129 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 if ((__kmp_user_lock_kind == lk_tas) &&
1134 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1135 lck = (kmp_user_lock_p)crit;
1136 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001137#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 else if ((__kmp_user_lock_kind == lk_futex) &&
1139 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1140 lck = (kmp_user_lock_p)crit;
1141 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001142#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001143 else { // ticket, queuing or drdpa
1144 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1145 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146
Jonathan Peyton30419822017-05-12 18:01:32 +00001147 if (__kmp_env_consistency_check)
1148 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001149
Jonathan Peyton30419822017-05-12 18:01:32 +00001150// since the critical directive binds to all threads, not just the current
1151// team we have to check this even if we are in a serialized team.
1152// also, even if we are the uber thread, we still have to conduct the lock,
1153// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154
1155#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001156 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001158#if OMPT_SUPPORT && OMPT_OPTIONAL
1159 OMPT_STORE_RETURN_ADDRESS(gtid);
1160 void *codeptr_ra = NULL;
1161 if (ompt_enabled.enabled) {
1162 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1163 /* OMPT state update */
1164 prev_state = ti.state;
1165 ti.wait_id = (ompt_wait_id_t)lck;
1166 ti.state = omp_state_wait_critical;
1167
1168 /* OMPT event callback */
1169 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1170 if (ompt_enabled.ompt_callback_mutex_acquire) {
1171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1172 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1173 (ompt_wait_id_t)crit, codeptr_ra);
1174 }
1175 }
1176#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001177 // Value of 'crit' should be good for using as a critical_id of the critical
1178 // section directive.
1179 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001180
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001181#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001182 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001183#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001184#if OMPT_SUPPORT && OMPT_OPTIONAL
1185 if (ompt_enabled.enabled) {
1186 /* OMPT state update */
1187 ti.state = prev_state;
1188 ti.wait_id = 0;
1189
1190 /* OMPT event callback */
1191 if (ompt_enabled.ompt_callback_mutex_acquired) {
1192 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1193 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1194 }
1195 }
1196#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001197
Jonathan Peyton30419822017-05-12 18:01:32 +00001198 KMP_START_EXPLICIT_TIMER(OMP_critical);
1199 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001200#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001201}
1202
1203#if KMP_USE_DYNAMIC_LOCK
1204
1205// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001206static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001207#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001208#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001209#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001210#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001211#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001212
1213#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001214#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001215#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001216#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001217#endif
1218
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 // Hints that do not require further logic
1220 if (hint & kmp_lock_hint_hle)
1221 return KMP_TSX_LOCK(hle);
1222 if (hint & kmp_lock_hint_rtm)
1223 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1224 if (hint & kmp_lock_hint_adaptive)
1225 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001226
Jonathan Peyton30419822017-05-12 18:01:32 +00001227 // Rule out conflicting hints first by returning the default lock
1228 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001229 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001230 if ((hint & omp_lock_hint_speculative) &&
1231 (hint & omp_lock_hint_nonspeculative))
1232 return __kmp_user_lock_seq;
1233
1234 // Do not even consider speculation when it appears to be contended
1235 if (hint & omp_lock_hint_contended)
1236 return lockseq_queuing;
1237
1238 // Uncontended lock without speculation
1239 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1240 return lockseq_tas;
1241
1242 // HLE lock for speculation
1243 if (hint & omp_lock_hint_speculative)
1244 return KMP_TSX_LOCK(hle);
1245
1246 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001247}
1248
Joachim Protze82e94a52017-11-01 10:08:30 +00001249#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze1b2bd262018-01-17 10:06:01 +00001250static kmp_mutex_impl_t
Joachim Protze82e94a52017-11-01 10:08:30 +00001251__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1252 if (user_lock) {
1253 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1254 case 0:
1255 break;
1256#if KMP_USE_FUTEX
1257 case locktag_futex:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001258 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001259#endif
1260 case locktag_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001261 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001262#if KMP_USE_TSX
1263 case locktag_hle:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001264 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001265#endif
1266 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001267 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001268 }
1269 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1270 }
1271 KMP_ASSERT(ilock);
1272 switch (ilock->type) {
1273#if KMP_USE_TSX
1274 case locktag_adaptive:
1275 case locktag_rtm:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001276 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001277#endif
1278 case locktag_nested_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001279 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001280#if KMP_USE_FUTEX
1281 case locktag_nested_futex:
1282#endif
1283 case locktag_ticket:
1284 case locktag_queuing:
1285 case locktag_drdpa:
1286 case locktag_nested_ticket:
1287 case locktag_nested_queuing:
1288 case locktag_nested_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001289 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001290 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001291 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001292 }
1293}
1294
1295// For locks without dynamic binding
Joachim Protze1b2bd262018-01-17 10:06:01 +00001296static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
Joachim Protze82e94a52017-11-01 10:08:30 +00001297 switch (__kmp_user_lock_kind) {
1298 case lk_tas:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001299 return kmp_mutex_impl_spin;
Joachim Protze82e94a52017-11-01 10:08:30 +00001300#if KMP_USE_FUTEX
1301 case lk_futex:
1302#endif
1303 case lk_ticket:
1304 case lk_queuing:
1305 case lk_drdpa:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001306 return kmp_mutex_impl_queuing;
Joachim Protze82e94a52017-11-01 10:08:30 +00001307#if KMP_USE_TSX
1308 case lk_hle:
1309 case lk_rtm:
1310 case lk_adaptive:
Joachim Protze1b2bd262018-01-17 10:06:01 +00001311 return kmp_mutex_impl_speculative;
Joachim Protze82e94a52017-11-01 10:08:30 +00001312#endif
1313 default:
Joachim Protzee6269e32018-01-17 11:13:11 +00001314 return ompt_mutex_impl_unknown;
Joachim Protze82e94a52017-11-01 10:08:30 +00001315 }
1316}
1317#endif
1318
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001319/*!
1320@ingroup WORK_SHARING
1321@param loc source location information.
1322@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001323@param crit identity of the critical section. This could be a pointer to a lock
1324associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001325@param hint the lock hint.
1326
Jonathan Peyton30419822017-05-12 18:01:32 +00001327Enter code protected by a `critical` construct with a hint. The hint value is
1328used to suggest a lock implementation. This function blocks until the executing
1329thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001330speculative execution and the hardware supports it.
1331*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001332void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1333 kmp_critical_name *crit, uintptr_t hint) {
1334 KMP_COUNT_BLOCK(OMP_CRITICAL);
1335 kmp_user_lock_p lck;
Joachim Protze82e94a52017-11-01 10:08:30 +00001336#if OMPT_SUPPORT && OMPT_OPTIONAL
1337 omp_state_t prev_state = omp_state_undefined;
1338 ompt_thread_info_t ti;
1339 // This is the case, if called from __kmpc_critical:
1340 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1341 if (!codeptr)
1342 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1343#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001344
Jonathan Peyton30419822017-05-12 18:01:32 +00001345 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001346
Jonathan Peyton30419822017-05-12 18:01:32 +00001347 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1348 // Check if it is initialized.
1349 if (*lk == 0) {
1350 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1351 if (KMP_IS_D_LOCK(lckseq)) {
1352 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1353 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001354 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001356 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001357 }
1358 // Branch for accessing the actual lock object and set operation. This
1359 // branching is inevitable since this lock initialization does not follow the
1360 // normal dispatch path (lock table is not used).
1361 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1362 lck = (kmp_user_lock_p)lk;
1363 if (__kmp_env_consistency_check) {
1364 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1365 __kmp_map_hint_to_lock(hint));
1366 }
1367#if USE_ITT_BUILD
1368 __kmp_itt_critical_acquiring(lck);
1369#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001370#if OMPT_SUPPORT && OMPT_OPTIONAL
1371 if (ompt_enabled.enabled) {
1372 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1373 /* OMPT state update */
1374 prev_state = ti.state;
1375 ti.wait_id = (ompt_wait_id_t)lck;
1376 ti.state = omp_state_wait_critical;
1377
1378 /* OMPT event callback */
1379 if (ompt_enabled.ompt_callback_mutex_acquire) {
1380 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1381 ompt_mutex_critical, (unsigned int)hint,
1382 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1383 }
1384 }
1385#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001386#if KMP_USE_INLINED_TAS
1387 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1388 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1389 } else
1390#elif KMP_USE_INLINED_FUTEX
1391 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1392 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1393 } else
1394#endif
1395 {
1396 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1397 }
1398 } else {
1399 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1400 lck = ilk->lock;
1401 if (__kmp_env_consistency_check) {
1402 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1403 __kmp_map_hint_to_lock(hint));
1404 }
1405#if USE_ITT_BUILD
1406 __kmp_itt_critical_acquiring(lck);
1407#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001408#if OMPT_SUPPORT && OMPT_OPTIONAL
1409 if (ompt_enabled.enabled) {
1410 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1411 /* OMPT state update */
1412 prev_state = ti.state;
1413 ti.wait_id = (ompt_wait_id_t)lck;
1414 ti.state = omp_state_wait_critical;
1415
1416 /* OMPT event callback */
1417 if (ompt_enabled.ompt_callback_mutex_acquire) {
1418 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1419 ompt_mutex_critical, (unsigned int)hint,
1420 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1421 }
1422 }
1423#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001424 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1425 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001426
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001428 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00001430#if OMPT_SUPPORT && OMPT_OPTIONAL
1431 if (ompt_enabled.enabled) {
1432 /* OMPT state update */
1433 ti.state = prev_state;
1434 ti.wait_id = 0;
1435
1436 /* OMPT event callback */
1437 if (ompt_enabled.ompt_callback_mutex_acquired) {
1438 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1439 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1440 }
1441 }
1442#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001443
Jonathan Peyton30419822017-05-12 18:01:32 +00001444 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1445 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001446} // __kmpc_critical_with_hint
1447
1448#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001449
1450/*!
1451@ingroup WORK_SHARING
1452@param loc source location information.
1453@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001454@param crit identity of the critical section. This could be a pointer to a lock
1455associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001456
1457Leave a critical section, releasing any lock that was held during its execution.
1458*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001459void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1460 kmp_critical_name *crit) {
1461 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001462
Jonathan Peyton30419822017-05-12 18:01:32 +00001463 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001465#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001466 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1467 lck = (kmp_user_lock_p)crit;
1468 KMP_ASSERT(lck != NULL);
1469 if (__kmp_env_consistency_check) {
1470 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001471 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001472#if USE_ITT_BUILD
1473 __kmp_itt_critical_releasing(lck);
1474#endif
1475#if KMP_USE_INLINED_TAS
1476 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1477 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1478 } else
1479#elif KMP_USE_INLINED_FUTEX
1480 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1481 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1482 } else
1483#endif
1484 {
1485 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1486 }
1487 } else {
1488 kmp_indirect_lock_t *ilk =
1489 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1490 KMP_ASSERT(ilk != NULL);
1491 lck = ilk->lock;
1492 if (__kmp_env_consistency_check) {
1493 __kmp_pop_sync(global_tid, ct_critical, loc);
1494 }
1495#if USE_ITT_BUILD
1496 __kmp_itt_critical_releasing(lck);
1497#endif
1498 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1499 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001500
1501#else // KMP_USE_DYNAMIC_LOCK
1502
Jonathan Peyton30419822017-05-12 18:01:32 +00001503 if ((__kmp_user_lock_kind == lk_tas) &&
1504 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1505 lck = (kmp_user_lock_p)crit;
1506 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001507#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001508 else if ((__kmp_user_lock_kind == lk_futex) &&
1509 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1510 lck = (kmp_user_lock_p)crit;
1511 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001512#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001513 else { // ticket, queuing or drdpa
1514 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1515 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001516
Jonathan Peyton30419822017-05-12 18:01:32 +00001517 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518
Jonathan Peyton30419822017-05-12 18:01:32 +00001519 if (__kmp_env_consistency_check)
1520 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001521
1522#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001523 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001524#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001525 // Value of 'crit' should be good for using as a critical_id of the critical
1526 // section directive.
1527 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528
Joachim Protze82e94a52017-11-01 10:08:30 +00001529#endif // KMP_USE_DYNAMIC_LOCK
1530
1531#if OMPT_SUPPORT && OMPT_OPTIONAL
1532 /* OMPT release event triggers after lock is released; place here to trigger
1533 * for all #if branches */
1534 OMPT_STORE_RETURN_ADDRESS(global_tid);
1535 if (ompt_enabled.ompt_callback_mutex_released) {
1536 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1537 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001538 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001539#endif
1540
Jonathan Peyton30419822017-05-12 18:01:32 +00001541 KMP_POP_PARTITIONED_TIMER();
1542 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543}
1544
1545/*!
1546@ingroup SYNCHRONIZATION
1547@param loc source location information
1548@param global_tid thread id.
1549@return one if the thread should execute the master block, zero otherwise
1550
Jonathan Peyton30419822017-05-12 18:01:32 +00001551Start execution of a combined barrier and master. The barrier is executed inside
1552this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001554kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1555 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001556
Jonathan Peyton30419822017-05-12 18:01:32 +00001557 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 if (!TCR_4(__kmp_init_parallel))
1560 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001561
Jonathan Peyton30419822017-05-12 18:01:32 +00001562 if (__kmp_env_consistency_check)
1563 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564
Joachim Protze82e94a52017-11-01 10:08:30 +00001565#if OMPT_SUPPORT
1566 ompt_frame_t *ompt_frame;
1567 if (ompt_enabled.enabled) {
1568 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001569 if (ompt_frame->enter_frame == NULL)
1570 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001571 OMPT_STORE_RETURN_ADDRESS(global_tid);
1572 }
1573#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001574#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001575 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001576#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001577 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001578#if OMPT_SUPPORT && OMPT_OPTIONAL
1579 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001580 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001581 }
1582#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583
Jonathan Peyton30419822017-05-12 18:01:32 +00001584 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585}
1586
1587/*!
1588@ingroup SYNCHRONIZATION
1589@param loc source location information
1590@param global_tid thread id.
1591
1592Complete the execution of a combined barrier and master. This function should
1593only be called at the completion of the <tt>master</tt> code. Other threads will
1594still be waiting at the barrier and this call releases them.
1595*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001596void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1597 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600}
1601
1602/*!
1603@ingroup SYNCHRONIZATION
1604@param loc source location information
1605@param global_tid thread id.
1606@return one if the thread should execute the master block, zero otherwise
1607
1608Start execution of a combined barrier and master(nowait) construct.
1609The barrier is executed inside this function.
1610There is no equivalent "end" function, since the
1611*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001612kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1613 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616
Jonathan Peyton30419822017-05-12 18:01:32 +00001617 if (!TCR_4(__kmp_init_parallel))
1618 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001619
Jonathan Peyton30419822017-05-12 18:01:32 +00001620 if (__kmp_env_consistency_check) {
1621 if (loc == 0) {
1622 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001623 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001624 __kmp_check_barrier(global_tid, ct_barrier, loc);
1625 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001626
Joachim Protze82e94a52017-11-01 10:08:30 +00001627#if OMPT_SUPPORT
1628 ompt_frame_t *ompt_frame;
1629 if (ompt_enabled.enabled) {
1630 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001631 if (ompt_frame->enter_frame == NULL)
1632 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001633 OMPT_STORE_RETURN_ADDRESS(global_tid);
1634 }
1635#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001636#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001637 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001638#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001639 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001640#if OMPT_SUPPORT && OMPT_OPTIONAL
1641 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001642 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001643 }
1644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645
Jonathan Peyton30419822017-05-12 18:01:32 +00001646 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647
Jonathan Peyton30419822017-05-12 18:01:32 +00001648 if (__kmp_env_consistency_check) {
1649 /* there's no __kmpc_end_master called; so the (stats) */
1650 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651
Jonathan Peyton30419822017-05-12 18:01:32 +00001652 if (global_tid < 0) {
1653 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001655 if (ret) {
1656 /* only one thread should do the pop since only */
1657 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001658
Jonathan Peyton30419822017-05-12 18:01:32 +00001659 __kmp_pop_sync(global_tid, ct_master, loc);
1660 }
1661 }
1662
1663 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664}
1665
1666/* The BARRIER for a SINGLE process section is always explicit */
1667/*!
1668@ingroup WORK_SHARING
1669@param loc source location information
1670@param global_tid global thread number
1671@return One if this thread should execute the single construct, zero otherwise.
1672
1673Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001674There are no implicit barriers in the two "single" calls, rather the compiler
1675should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001676*/
1677
Jonathan Peyton30419822017-05-12 18:01:32 +00001678kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1679 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001680
Jonathan Peyton30419822017-05-12 18:01:32 +00001681 if (rc) {
1682 // We are going to execute the single statement, so we should count it.
1683 KMP_COUNT_BLOCK(OMP_SINGLE);
1684 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1685 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001686
Joachim Protze82e94a52017-11-01 10:08:30 +00001687#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001688 kmp_info_t *this_thr = __kmp_threads[global_tid];
1689 kmp_team_t *team = this_thr->th.th_team;
1690 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001691
Joachim Protze82e94a52017-11-01 10:08:30 +00001692 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001693 if (rc) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001694 if (ompt_enabled.ompt_callback_work) {
1695 ompt_callbacks.ompt_callback(ompt_callback_work)(
1696 ompt_work_single_executor, ompt_scope_begin,
1697 &(team->t.ompt_team_info.parallel_data),
1698 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1699 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 }
1701 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00001702 if (ompt_enabled.ompt_callback_work) {
1703 ompt_callbacks.ompt_callback(ompt_callback_work)(
1704 ompt_work_single_other, ompt_scope_begin,
1705 &(team->t.ompt_team_info.parallel_data),
1706 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1707 1, OMPT_GET_RETURN_ADDRESS(0));
1708 ompt_callbacks.ompt_callback(ompt_callback_work)(
1709 ompt_work_single_other, ompt_scope_end,
1710 &(team->t.ompt_team_info.parallel_data),
1711 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1712 1, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001713 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001714 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001716#endif
1717
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001719}
1720
1721/*!
1722@ingroup WORK_SHARING
1723@param loc source location information
1724@param global_tid global thread number
1725
1726Mark the end of a <tt>single</tt> construct. This function should
1727only be called by the thread that executed the block of code protected
1728by the `single` construct.
1729*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001730void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1731 __kmp_exit_single(global_tid);
1732 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001733
Joachim Protze82e94a52017-11-01 10:08:30 +00001734#if OMPT_SUPPORT && OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +00001735 kmp_info_t *this_thr = __kmp_threads[global_tid];
1736 kmp_team_t *team = this_thr->th.th_team;
1737 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001738
Joachim Protze82e94a52017-11-01 10:08:30 +00001739 if (ompt_enabled.ompt_callback_work) {
1740 ompt_callbacks.ompt_callback(ompt_callback_work)(
1741 ompt_work_single_executor, ompt_scope_end,
1742 &(team->t.ompt_team_info.parallel_data),
1743 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1744 OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001745 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001746#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001747}
1748
1749/*!
1750@ingroup WORK_SHARING
1751@param loc Source location
1752@param global_tid Global thread id
1753
1754Mark the end of a statically scheduled loop.
1755*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001756void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1757 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001758
Joachim Protze82e94a52017-11-01 10:08:30 +00001759#if OMPT_SUPPORT && OMPT_OPTIONAL
1760 if (ompt_enabled.ompt_callback_work) {
Joachim Protze91732472017-11-10 21:07:01 +00001761 ompt_work_type_t ompt_work_type = ompt_work_loop;
Jonathan Peyton30419822017-05-12 18:01:32 +00001762 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001763 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1764 // Determine workshare type
1765 if (loc != NULL) {
1766 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1767 ompt_work_type = ompt_work_loop;
1768 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1769 ompt_work_type = ompt_work_sections;
1770 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1771 ompt_work_type = ompt_work_distribute;
1772 } else {
Joachim Protze91732472017-11-10 21:07:01 +00001773 // use default set above.
1774 // a warning about this case is provided in __kmpc_for_static_init
Joachim Protze82e94a52017-11-01 10:08:30 +00001775 }
1776 KMP_DEBUG_ASSERT(ompt_work_type);
1777 }
1778 ompt_callbacks.ompt_callback(ompt_callback_work)(
1779 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1780 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
Jonathan Peyton30419822017-05-12 18:01:32 +00001781 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001782#endif
1783
Jonathan Peyton30419822017-05-12 18:01:32 +00001784 if (__kmp_env_consistency_check)
1785 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786}
1787
Jonathan Peyton30419822017-05-12 18:01:32 +00001788// User routines which take C-style arguments (call by value)
1789// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790
Jonathan Peyton30419822017-05-12 18:01:32 +00001791void ompc_set_num_threads(int arg) {
1792 // !!!!! TODO: check the per-task binding
1793 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794}
1795
Jonathan Peyton30419822017-05-12 18:01:32 +00001796void ompc_set_dynamic(int flag) {
1797 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001798
Jonathan Peyton30419822017-05-12 18:01:32 +00001799 /* For the thread-private implementation of the internal controls */
1800 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001801
Jonathan Peyton30419822017-05-12 18:01:32 +00001802 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803
Jonathan Peyton30419822017-05-12 18:01:32 +00001804 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805}
1806
Jonathan Peyton30419822017-05-12 18:01:32 +00001807void ompc_set_nested(int flag) {
1808 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 /* For the thread-private internal controls implementation */
1811 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814
Jonathan Peyton30419822017-05-12 18:01:32 +00001815 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816}
1817
Jonathan Peyton30419822017-05-12 18:01:32 +00001818void ompc_set_max_active_levels(int max_active_levels) {
1819 /* TO DO */
1820 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jonathan Peyton30419822017-05-12 18:01:32 +00001822 /* For the per-thread internal controls implementation */
1823 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824}
1825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826void ompc_set_schedule(omp_sched_t kind, int modifier) {
1827 // !!!!! TODO: check the per-task binding
1828 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829}
1830
Jonathan Peyton30419822017-05-12 18:01:32 +00001831int ompc_get_ancestor_thread_num(int level) {
1832 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833}
1834
Jonathan Peyton30419822017-05-12 18:01:32 +00001835int ompc_get_team_size(int level) {
1836 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837}
1838
Jonathan Peyton30419822017-05-12 18:01:32 +00001839void kmpc_set_stacksize(int arg) {
1840 // __kmp_aux_set_stacksize initializes the library if needed
1841 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001842}
1843
Jonathan Peyton30419822017-05-12 18:01:32 +00001844void kmpc_set_stacksize_s(size_t arg) {
1845 // __kmp_aux_set_stacksize initializes the library if needed
1846 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847}
1848
Jonathan Peyton30419822017-05-12 18:01:32 +00001849void kmpc_set_blocktime(int arg) {
1850 int gtid, tid;
1851 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
Jonathan Peyton30419822017-05-12 18:01:32 +00001853 gtid = __kmp_entry_gtid();
1854 tid = __kmp_tid_from_gtid(gtid);
1855 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856
Jonathan Peyton30419822017-05-12 18:01:32 +00001857 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001858}
1859
Jonathan Peyton30419822017-05-12 18:01:32 +00001860void kmpc_set_library(int arg) {
1861 // __kmp_user_set_library initializes the library if needed
1862 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001863}
1864
Jonathan Peyton30419822017-05-12 18:01:32 +00001865void kmpc_set_defaults(char const *str) {
1866 // __kmp_aux_set_defaults initializes the library if needed
1867 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868}
1869
Jonathan Peyton30419822017-05-12 18:01:32 +00001870void kmpc_set_disp_num_buffers(int arg) {
1871 // ignore after initialization because some teams have already
1872 // allocated dispatch buffers
1873 if (__kmp_init_serial == 0 && arg > 0)
1874 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001875}
1876
Jonathan Peyton30419822017-05-12 18:01:32 +00001877int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001878#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001879 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 if (!TCR_4(__kmp_init_middle)) {
1882 __kmp_middle_initialize();
1883 }
1884 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885#endif
1886}
1887
Jonathan Peyton30419822017-05-12 18:01:32 +00001888int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001889#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001890 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001892 if (!TCR_4(__kmp_init_middle)) {
1893 __kmp_middle_initialize();
1894 }
1895 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896#endif
1897}
1898
Jonathan Peyton30419822017-05-12 18:01:32 +00001899int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001900#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001901 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001903 if (!TCR_4(__kmp_init_middle)) {
1904 __kmp_middle_initialize();
1905 }
1906 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907#endif
1908}
1909
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910/* -------------------------------------------------------------------------- */
1911/*!
1912@ingroup THREADPRIVATE
1913@param loc source location information
1914@param gtid global thread number
1915@param cpy_size size of the cpy_data buffer
1916@param cpy_data pointer to data to be copied
1917@param cpy_func helper function to call for copying data
1918@param didit flag variable: 1=single thread; 0=not single thread
1919
Jonathan Peyton30419822017-05-12 18:01:32 +00001920__kmpc_copyprivate implements the interface for the private data broadcast
1921needed for the copyprivate clause associated with a single region in an
1922OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00001924One of the threads (called the single thread) should have the <tt>didit</tt>
1925variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926All threads pass a pointer to a data buffer (cpy_data) that they have built.
1927
Jonathan Peyton30419822017-05-12 18:01:32 +00001928The OpenMP specification forbids the use of nowait on the single region when a
1929copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1930barrier internally to avoid race conditions, so the code generation for the
1931single region should avoid generating a barrier after the call to @ref
1932__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933
1934The <tt>gtid</tt> parameter is the global thread id for the current thread.
1935The <tt>loc</tt> parameter is a pointer to source location information.
1936
Jonathan Peyton30419822017-05-12 18:01:32 +00001937Internal implementation: The single thread will first copy its descriptor
1938address (cpy_data) to a team-private location, then the other threads will each
1939call the function pointed to by the parameter cpy_func, which carries out the
1940copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941
Jonathan Peyton30419822017-05-12 18:01:32 +00001942The cpy_func routine used for the copy and the contents of the data area defined
1943by cpy_data and cpy_size may be built in any fashion that will allow the copy
1944to be done. For instance, the cpy_data buffer can hold the actual data to be
1945copied or it may hold a list of pointers to the data. The cpy_func routine must
1946interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947
1948The interface to cpy_func is as follows:
1949@code
1950void cpy_func( void *destination, void *source )
1951@endcode
1952where void *destination is the cpy_data pointer for the thread being copied to
1953and void *source is the cpy_data pointer for the thread being copied from.
1954*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001955void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1956 void *cpy_data, void (*cpy_func)(void *, void *),
1957 kmp_int32 didit) {
1958 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961
Jonathan Peyton30419822017-05-12 18:01:32 +00001962 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963
Jonathan Peyton30419822017-05-12 18:01:32 +00001964 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965
Jonathan Peyton30419822017-05-12 18:01:32 +00001966 if (__kmp_env_consistency_check) {
1967 if (loc == 0) {
1968 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001970 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971
Jonathan Peyton30419822017-05-12 18:01:32 +00001972 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973
Jonathan Peyton30419822017-05-12 18:01:32 +00001974 if (didit)
1975 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976
Joachim Protze82e94a52017-11-01 10:08:30 +00001977#if OMPT_SUPPORT
1978 ompt_frame_t *ompt_frame;
1979 if (ompt_enabled.enabled) {
1980 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00001981 if (ompt_frame->enter_frame == NULL)
1982 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001983 OMPT_STORE_RETURN_ADDRESS(gtid);
1984 }
1985#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001986/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001988 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001989#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001990 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 if (!didit)
1993 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994
Jonathan Peyton30419822017-05-12 18:01:32 +00001995// Consider next barrier a user-visible barrier for barrier region boundaries
1996// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997
Joachim Protze82e94a52017-11-01 10:08:30 +00001998#if OMPT_SUPPORT
1999 if (ompt_enabled.enabled) {
2000 OMPT_STORE_RETURN_ADDRESS(gtid);
2001 }
2002#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002003#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002004 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2005// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002006#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00002008#if OMPT_SUPPORT && OMPT_OPTIONAL
2009 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00002010 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002011 }
2012#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013}
2014
2015/* -------------------------------------------------------------------------- */
2016
Jonathan Peyton30419822017-05-12 18:01:32 +00002017#define INIT_LOCK __kmp_init_user_lock_with_checks
2018#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2019#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2020#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2021#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2022#define ACQUIRE_NESTED_LOCK_TIMED \
2023 __kmp_acquire_nested_user_lock_with_checks_timed
2024#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2025#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2026#define TEST_LOCK __kmp_test_user_lock_with_checks
2027#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2028#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2029#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030
Jonathan Peyton30419822017-05-12 18:01:32 +00002031// TODO: Make check abort messages use location info & pass it into
2032// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002033
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002034#if KMP_USE_DYNAMIC_LOCK
2035
2036// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00002037static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2038 kmp_dyna_lockseq_t seq) {
2039 if (KMP_IS_D_LOCK(seq)) {
2040 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002041#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002042 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002043#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002044 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002045 KMP_INIT_I_LOCK(lock, seq);
2046#if USE_ITT_BUILD
2047 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2048 __kmp_itt_lock_creating(ilk->lock, loc);
2049#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002050 }
2051}
2052
2053// internal nest lock initializer
2054static __forceinline void
2055__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2056 kmp_dyna_lockseq_t seq) {
2057#if KMP_USE_TSX
2058 // Don't have nested lock implementation for speculative locks
2059 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2060 seq = __kmp_user_lock_seq;
2061#endif
2062 switch (seq) {
2063 case lockseq_tas:
2064 seq = lockseq_nested_tas;
2065 break;
2066#if KMP_USE_FUTEX
2067 case lockseq_futex:
2068 seq = lockseq_nested_futex;
2069 break;
2070#endif
2071 case lockseq_ticket:
2072 seq = lockseq_nested_ticket;
2073 break;
2074 case lockseq_queuing:
2075 seq = lockseq_nested_queuing;
2076 break;
2077 case lockseq_drdpa:
2078 seq = lockseq_nested_drdpa;
2079 break;
2080 default:
2081 seq = lockseq_nested_queuing;
2082 }
2083 KMP_INIT_I_LOCK(lock, seq);
2084#if USE_ITT_BUILD
2085 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2086 __kmp_itt_lock_creating(ilk->lock, loc);
2087#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002088}
2089
2090/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002091void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2092 uintptr_t hint) {
2093 KMP_DEBUG_ASSERT(__kmp_init_serial);
2094 if (__kmp_env_consistency_check && user_lock == NULL) {
2095 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2096 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002097
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002099
2100#if OMPT_SUPPORT && OMPT_OPTIONAL
2101 // This is the case, if called from omp_init_lock_with_hint:
2102 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2103 if (!codeptr)
2104 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2105 if (ompt_enabled.ompt_callback_lock_init) {
2106 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2107 ompt_mutex_lock, (omp_lock_hint_t)hint,
2108 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2109 codeptr);
2110 }
2111#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002112}
2113
2114/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00002115void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2116 void **user_lock, uintptr_t hint) {
2117 KMP_DEBUG_ASSERT(__kmp_init_serial);
2118 if (__kmp_env_consistency_check && user_lock == NULL) {
2119 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2120 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002121
Jonathan Peyton30419822017-05-12 18:01:32 +00002122 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Joachim Protze82e94a52017-11-01 10:08:30 +00002123
2124#if OMPT_SUPPORT && OMPT_OPTIONAL
2125 // This is the case, if called from omp_init_lock_with_hint:
2126 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2127 if (!codeptr)
2128 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2129 if (ompt_enabled.ompt_callback_lock_init) {
2130 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2131 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2132 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2133 codeptr);
2134 }
2135#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00002136}
2137
2138#endif // KMP_USE_DYNAMIC_LOCK
2139
Jim Cownie5e8470a2013-09-27 10:38:44 +00002140/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002141void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002142#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002143
2144 KMP_DEBUG_ASSERT(__kmp_init_serial);
2145 if (__kmp_env_consistency_check && user_lock == NULL) {
2146 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2147 }
2148 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002149
Joachim Protze82e94a52017-11-01 10:08:30 +00002150#if OMPT_SUPPORT && OMPT_OPTIONAL
2151 // This is the case, if called from omp_init_lock_with_hint:
2152 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2153 if (!codeptr)
2154 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2155 if (ompt_enabled.ompt_callback_lock_init) {
2156 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2157 ompt_mutex_lock, omp_lock_hint_none,
2158 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2159 codeptr);
2160 }
2161#endif
2162
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002163#else // KMP_USE_DYNAMIC_LOCK
2164
Jonathan Peyton30419822017-05-12 18:01:32 +00002165 static char const *const func = "omp_init_lock";
2166 kmp_user_lock_p lck;
2167 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168
Jonathan Peyton30419822017-05-12 18:01:32 +00002169 if (__kmp_env_consistency_check) {
2170 if (user_lock == NULL) {
2171 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002172 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002173 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174
Jonathan Peyton30419822017-05-12 18:01:32 +00002175 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002176
Jonathan Peyton30419822017-05-12 18:01:32 +00002177 if ((__kmp_user_lock_kind == lk_tas) &&
2178 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2179 lck = (kmp_user_lock_p)user_lock;
2180 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002181#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002182 else if ((__kmp_user_lock_kind == lk_futex) &&
2183 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2184 lck = (kmp_user_lock_p)user_lock;
2185 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002187 else {
2188 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2189 }
2190 INIT_LOCK(lck);
2191 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192
Joachim Protze82e94a52017-11-01 10:08:30 +00002193#if OMPT_SUPPORT && OMPT_OPTIONAL
2194 // This is the case, if called from omp_init_lock_with_hint:
2195 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2196 if (!codeptr)
2197 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2198 if (ompt_enabled.ompt_callback_lock_init) {
2199 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2200 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2201 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002202 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002203#endif
2204
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002208
2209#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002210} // __kmpc_init_lock
2211
2212/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002213void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002214#if KMP_USE_DYNAMIC_LOCK
2215
Jonathan Peyton30419822017-05-12 18:01:32 +00002216 KMP_DEBUG_ASSERT(__kmp_init_serial);
2217 if (__kmp_env_consistency_check && user_lock == NULL) {
2218 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2219 }
2220 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002221
Joachim Protze82e94a52017-11-01 10:08:30 +00002222#if OMPT_SUPPORT && OMPT_OPTIONAL
2223 // This is the case, if called from omp_init_lock_with_hint:
2224 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2225 if (!codeptr)
2226 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2227 if (ompt_enabled.ompt_callback_lock_init) {
2228 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2229 ompt_mutex_nest_lock, omp_lock_hint_none,
2230 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2231 codeptr);
2232 }
2233#endif
2234
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002235#else // KMP_USE_DYNAMIC_LOCK
2236
Jonathan Peyton30419822017-05-12 18:01:32 +00002237 static char const *const func = "omp_init_nest_lock";
2238 kmp_user_lock_p lck;
2239 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002240
Jonathan Peyton30419822017-05-12 18:01:32 +00002241 if (__kmp_env_consistency_check) {
2242 if (user_lock == NULL) {
2243 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002244 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002245 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246
Jonathan Peyton30419822017-05-12 18:01:32 +00002247 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248
Jonathan Peyton30419822017-05-12 18:01:32 +00002249 if ((__kmp_user_lock_kind == lk_tas) &&
2250 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2251 OMP_NEST_LOCK_T_SIZE)) {
2252 lck = (kmp_user_lock_p)user_lock;
2253 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002254#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002255 else if ((__kmp_user_lock_kind == lk_futex) &&
2256 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2257 OMP_NEST_LOCK_T_SIZE)) {
2258 lck = (kmp_user_lock_p)user_lock;
2259 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002260#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002261 else {
2262 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2263 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002264
Jonathan Peyton30419822017-05-12 18:01:32 +00002265 INIT_NESTED_LOCK(lck);
2266 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267
Joachim Protze82e94a52017-11-01 10:08:30 +00002268#if OMPT_SUPPORT && OMPT_OPTIONAL
2269 // This is the case, if called from omp_init_lock_with_hint:
2270 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2271 if (!codeptr)
2272 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2273 if (ompt_enabled.ompt_callback_lock_init) {
2274 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2275 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2276 (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002277 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002278#endif
2279
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002281 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002283
2284#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285} // __kmpc_init_nest_lock
2286
Jonathan Peyton30419822017-05-12 18:01:32 +00002287void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002288#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289
Jonathan Peyton30419822017-05-12 18:01:32 +00002290#if USE_ITT_BUILD
2291 kmp_user_lock_p lck;
2292 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2293 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2294 } else {
2295 lck = (kmp_user_lock_p)user_lock;
2296 }
2297 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002298#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002299#if OMPT_SUPPORT && OMPT_OPTIONAL
2300 // This is the case, if called from omp_init_lock_with_hint:
2301 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2302 if (!codeptr)
2303 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2304 if (ompt_enabled.ompt_callback_lock_destroy) {
2305 kmp_user_lock_p lck;
2306 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2307 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2308 } else {
2309 lck = (kmp_user_lock_p)user_lock;
2310 }
2311 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2312 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2313 }
2314#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002315 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2316#else
2317 kmp_user_lock_p lck;
2318
2319 if ((__kmp_user_lock_kind == lk_tas) &&
2320 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2321 lck = (kmp_user_lock_p)user_lock;
2322 }
2323#if KMP_USE_FUTEX
2324 else if ((__kmp_user_lock_kind == lk_futex) &&
2325 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2326 lck = (kmp_user_lock_p)user_lock;
2327 }
2328#endif
2329 else {
2330 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2331 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332
Joachim Protze82e94a52017-11-01 10:08:30 +00002333#if OMPT_SUPPORT && OMPT_OPTIONAL
2334 // This is the case, if called from omp_init_lock_with_hint:
2335 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2336 if (!codeptr)
2337 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2338 if (ompt_enabled.ompt_callback_lock_destroy) {
2339 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2340 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002341 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002342#endif
2343
Jim Cownie5e8470a2013-09-27 10:38:44 +00002344#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002345 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002346#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002347 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002348
Jonathan Peyton30419822017-05-12 18:01:32 +00002349 if ((__kmp_user_lock_kind == lk_tas) &&
2350 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2351 ;
2352 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002353#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002354 else if ((__kmp_user_lock_kind == lk_futex) &&
2355 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2356 ;
2357 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002359 else {
2360 __kmp_user_lock_free(user_lock, gtid, lck);
2361 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002362#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363} // __kmpc_destroy_lock
2364
2365/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002366void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002367#if KMP_USE_DYNAMIC_LOCK
2368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369#if USE_ITT_BUILD
2370 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2371 __kmp_itt_lock_destroyed(ilk->lock);
2372#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002373#if OMPT_SUPPORT && OMPT_OPTIONAL
2374 // This is the case, if called from omp_init_lock_with_hint:
2375 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2376 if (!codeptr)
2377 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2378 if (ompt_enabled.ompt_callback_lock_destroy) {
2379 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2380 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2381 }
2382#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002383 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002384
2385#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386
Jonathan Peyton30419822017-05-12 18:01:32 +00002387 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388
Jonathan Peyton30419822017-05-12 18:01:32 +00002389 if ((__kmp_user_lock_kind == lk_tas) &&
2390 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2391 OMP_NEST_LOCK_T_SIZE)) {
2392 lck = (kmp_user_lock_p)user_lock;
2393 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002394#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002395 else if ((__kmp_user_lock_kind == lk_futex) &&
2396 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2397 OMP_NEST_LOCK_T_SIZE)) {
2398 lck = (kmp_user_lock_p)user_lock;
2399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002401 else {
2402 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2403 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404
Joachim Protze82e94a52017-11-01 10:08:30 +00002405#if OMPT_SUPPORT && OMPT_OPTIONAL
2406 // This is the case, if called from omp_init_lock_with_hint:
2407 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2408 if (!codeptr)
2409 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2410 if (ompt_enabled.ompt_callback_lock_destroy) {
2411 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2412 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002413 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002414#endif
2415
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002417 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418#endif /* USE_ITT_BUILD */
2419
Jonathan Peyton30419822017-05-12 18:01:32 +00002420 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421
Jonathan Peyton30419822017-05-12 18:01:32 +00002422 if ((__kmp_user_lock_kind == lk_tas) &&
2423 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2424 OMP_NEST_LOCK_T_SIZE)) {
2425 ;
2426 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002427#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002428 else if ((__kmp_user_lock_kind == lk_futex) &&
2429 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2430 OMP_NEST_LOCK_T_SIZE)) {
2431 ;
2432 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002433#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002434 else {
2435 __kmp_user_lock_free(user_lock, gtid, lck);
2436 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002437#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002438} // __kmpc_destroy_nest_lock
2439
Jonathan Peyton30419822017-05-12 18:01:32 +00002440void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2441 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002442#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002443 int tag = KMP_EXTRACT_D_TAG(user_lock);
2444#if USE_ITT_BUILD
2445 __kmp_itt_lock_acquiring(
2446 (kmp_user_lock_p)
2447 user_lock); // itt function will get to the right lock object.
2448#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002449#if OMPT_SUPPORT && OMPT_OPTIONAL
2450 // This is the case, if called from omp_init_lock_with_hint:
2451 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2452 if (!codeptr)
2453 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2454 if (ompt_enabled.ompt_callback_mutex_acquire) {
2455 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2456 ompt_mutex_lock, omp_lock_hint_none,
2457 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2458 codeptr);
2459 }
2460#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002461#if KMP_USE_INLINED_TAS
2462 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2463 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2464 } else
2465#elif KMP_USE_INLINED_FUTEX
2466 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2467 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2468 } else
2469#endif
2470 {
2471 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2472 }
2473#if USE_ITT_BUILD
2474 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2475#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002476#if OMPT_SUPPORT && OMPT_OPTIONAL
2477 if (ompt_enabled.ompt_callback_mutex_acquired) {
2478 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2479 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2480 }
2481#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002482
2483#else // KMP_USE_DYNAMIC_LOCK
2484
Jonathan Peyton30419822017-05-12 18:01:32 +00002485 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002486
Jonathan Peyton30419822017-05-12 18:01:32 +00002487 if ((__kmp_user_lock_kind == lk_tas) &&
2488 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2489 lck = (kmp_user_lock_p)user_lock;
2490 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002491#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002492 else if ((__kmp_user_lock_kind == lk_futex) &&
2493 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2494 lck = (kmp_user_lock_p)user_lock;
2495 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002496#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 else {
2498 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2499 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500
2501#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002502 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002504#if OMPT_SUPPORT && OMPT_OPTIONAL
2505 // This is the case, if called from omp_init_lock_with_hint:
2506 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2507 if (!codeptr)
2508 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2509 if (ompt_enabled.ompt_callback_mutex_acquire) {
2510 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2511 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2512 (ompt_wait_id_t)lck, codeptr);
2513 }
2514#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002515
Jonathan Peyton30419822017-05-12 18:01:32 +00002516 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517
2518#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002519 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002520#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521
Joachim Protze82e94a52017-11-01 10:08:30 +00002522#if OMPT_SUPPORT && OMPT_OPTIONAL
2523 if (ompt_enabled.ompt_callback_mutex_acquired) {
2524 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2525 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002526 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002527#endif
2528
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002529#endif // KMP_USE_DYNAMIC_LOCK
2530}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002531
Jonathan Peyton30419822017-05-12 18:01:32 +00002532void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002533#if KMP_USE_DYNAMIC_LOCK
2534
Jonathan Peyton30419822017-05-12 18:01:32 +00002535#if USE_ITT_BUILD
2536 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2537#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002538#if OMPT_SUPPORT && OMPT_OPTIONAL
2539 // This is the case, if called from omp_init_lock_with_hint:
2540 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 if (!codeptr)
2542 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2543 if (ompt_enabled.enabled) {
2544 if (ompt_enabled.ompt_callback_mutex_acquire) {
2545 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2546 ompt_mutex_nest_lock, omp_lock_hint_none,
2547 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2548 codeptr);
2549 }
2550 }
2551#endif
2552 int acquire_status =
2553 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002554#if USE_ITT_BUILD
2555 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002556#endif
2557
Joachim Protze82e94a52017-11-01 10:08:30 +00002558#if OMPT_SUPPORT && OMPT_OPTIONAL
2559 if (ompt_enabled.enabled) {
2560 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2561 if (ompt_enabled.ompt_callback_mutex_acquired) {
2562 // lock_first
2563 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2564 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2565 }
2566 } else {
2567 if (ompt_enabled.ompt_callback_nest_lock) {
2568 // lock_next
2569 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2570 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2571 }
2572 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002573 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002574#endif
2575
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002576#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002577 int acquire_status;
2578 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002579
Jonathan Peyton30419822017-05-12 18:01:32 +00002580 if ((__kmp_user_lock_kind == lk_tas) &&
2581 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2582 OMP_NEST_LOCK_T_SIZE)) {
2583 lck = (kmp_user_lock_p)user_lock;
2584 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002585#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002586 else if ((__kmp_user_lock_kind == lk_futex) &&
2587 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2588 OMP_NEST_LOCK_T_SIZE)) {
2589 lck = (kmp_user_lock_p)user_lock;
2590 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002592 else {
2593 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2594 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595
2596#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002597 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002599#if OMPT_SUPPORT && OMPT_OPTIONAL
2600 // This is the case, if called from omp_init_lock_with_hint:
2601 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2602 if (!codeptr)
2603 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2604 if (ompt_enabled.enabled) {
2605 if (ompt_enabled.ompt_callback_mutex_acquire) {
2606 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2607 ompt_mutex_nest_lock, omp_lock_hint_none,
2608 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2609 }
2610 }
2611#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612
Jonathan Peyton30419822017-05-12 18:01:32 +00002613 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614
2615#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002616 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002617#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002618
Joachim Protze82e94a52017-11-01 10:08:30 +00002619#if OMPT_SUPPORT && OMPT_OPTIONAL
2620 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002621 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002622 if (ompt_enabled.ompt_callback_mutex_acquired) {
2623 // lock_first
2624 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2625 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2626 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002627 } else {
Joachim Protze82e94a52017-11-01 10:08:30 +00002628 if (ompt_enabled.ompt_callback_nest_lock) {
2629 // lock_next
2630 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2631 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2632 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002633 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002634 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002635#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002636
2637#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002638}
2639
Jonathan Peyton30419822017-05-12 18:01:32 +00002640void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002641#if KMP_USE_DYNAMIC_LOCK
2642
Jonathan Peyton30419822017-05-12 18:01:32 +00002643 int tag = KMP_EXTRACT_D_TAG(user_lock);
2644#if USE_ITT_BUILD
2645 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2646#endif
2647#if KMP_USE_INLINED_TAS
2648 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2649 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2650 } else
2651#elif KMP_USE_INLINED_FUTEX
2652 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2653 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2654 } else
2655#endif
2656 {
2657 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2658 }
2659
Joachim Protze82e94a52017-11-01 10:08:30 +00002660#if OMPT_SUPPORT && OMPT_OPTIONAL
2661 // This is the case, if called from omp_init_lock_with_hint:
2662 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2663 if (!codeptr)
2664 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2665 if (ompt_enabled.ompt_callback_mutex_released) {
2666 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2667 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2668 }
2669#endif
2670
Jonathan Peyton30419822017-05-12 18:01:32 +00002671#else // KMP_USE_DYNAMIC_LOCK
2672
2673 kmp_user_lock_p lck;
2674
2675 /* Can't use serial interval since not block structured */
2676 /* release the lock */
2677
2678 if ((__kmp_user_lock_kind == lk_tas) &&
2679 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2680#if KMP_OS_LINUX && \
2681 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2682// "fast" path implemented to fix customer performance issue
2683#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002684 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002685#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002686 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2687 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002688
2689#if OMPT_SUPPORT && OMPT_OPTIONAL
2690 // This is the case, if called from omp_init_lock_with_hint:
2691 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692 if (!codeptr)
2693 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2694 if (ompt_enabled.ompt_callback_mutex_released) {
2695 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2696 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2697 }
2698#endif
2699
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002701#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002702 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002704 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002705#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 else if ((__kmp_user_lock_kind == lk_futex) &&
2707 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2708 lck = (kmp_user_lock_p)user_lock;
2709 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 else {
2712 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2713 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002714
2715#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002716 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717#endif /* USE_ITT_BUILD */
2718
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002720
Joachim Protze82e94a52017-11-01 10:08:30 +00002721#if OMPT_SUPPORT && OMPT_OPTIONAL
2722 // This is the case, if called from omp_init_lock_with_hint:
2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2724 if (!codeptr)
2725 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2726 if (ompt_enabled.ompt_callback_mutex_released) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2728 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002729 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002730#endif
2731
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002732#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733}
2734
2735/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002736void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002737#if KMP_USE_DYNAMIC_LOCK
2738
Jonathan Peyton30419822017-05-12 18:01:32 +00002739#if USE_ITT_BUILD
2740 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2741#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002742 int release_status =
2743 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2744
2745#if OMPT_SUPPORT && OMPT_OPTIONAL
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 if (ompt_enabled.enabled) {
2751 if (release_status == KMP_LOCK_RELEASED) {
2752 if (ompt_enabled.ompt_callback_mutex_released) {
2753 // release_lock_last
2754 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2755 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2756 }
2757 } else if (ompt_enabled.ompt_callback_nest_lock) {
2758 // release_lock_prev
2759 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2760 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2761 }
2762 }
2763#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002764
2765#else // KMP_USE_DYNAMIC_LOCK
2766
2767 kmp_user_lock_p lck;
2768
2769 /* Can't use serial interval since not block structured */
2770
2771 if ((__kmp_user_lock_kind == lk_tas) &&
2772 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2773 OMP_NEST_LOCK_T_SIZE)) {
2774#if KMP_OS_LINUX && \
2775 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2776 // "fast" path implemented to fix customer performance issue
2777 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2778#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002779 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002781
2782#if OMPT_SUPPORT && OMPT_OPTIONAL
2783 int release_status = KMP_LOCK_STILL_HELD;
2784#endif
2785
Jonathan Peyton30419822017-05-12 18:01:32 +00002786 if (--(tl->lk.depth_locked) == 0) {
2787 TCW_4(tl->lk.poll, 0);
Joachim Protze82e94a52017-11-01 10:08:30 +00002788#if OMPT_SUPPORT && OMPT_OPTIONAL
2789 release_status = KMP_LOCK_RELEASED;
2790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 }
2792 KMP_MB();
Joachim Protze82e94a52017-11-01 10:08:30 +00002793
2794#if OMPT_SUPPORT && OMPT_OPTIONAL
2795 // This is the case, if called from omp_init_lock_with_hint:
2796 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2797 if (!codeptr)
2798 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2799 if (ompt_enabled.enabled) {
2800 if (release_status == KMP_LOCK_RELEASED) {
2801 if (ompt_enabled.ompt_callback_mutex_released) {
2802 // release_lock_last
2803 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2804 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2805 }
2806 } else if (ompt_enabled.ompt_callback_nest_lock) {
2807 // release_lock_previous
2808 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2809 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2810 }
2811 }
2812#endif
2813
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002815#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002816 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002817#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002818 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002819#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002820 else if ((__kmp_user_lock_kind == lk_futex) &&
2821 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2822 OMP_NEST_LOCK_T_SIZE)) {
2823 lck = (kmp_user_lock_p)user_lock;
2824 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002825#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002826 else {
2827 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2828 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002829
2830#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002831 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002832#endif /* USE_ITT_BUILD */
2833
Jonathan Peyton30419822017-05-12 18:01:32 +00002834 int release_status;
2835 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Joachim Protze82e94a52017-11-01 10:08:30 +00002836#if OMPT_SUPPORT && OMPT_OPTIONAL
2837 // This is the case, if called from omp_init_lock_with_hint:
2838 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2839 if (!codeptr)
2840 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002842 if (release_status == KMP_LOCK_RELEASED) {
Joachim Protze82e94a52017-11-01 10:08:30 +00002843 if (ompt_enabled.ompt_callback_mutex_released) {
2844 // release_lock_last
2845 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2846 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002847 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002848 } else if (ompt_enabled.ompt_callback_nest_lock) {
2849 // release_lock_previous
2850 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2851 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002852 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002853 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002854#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002855
2856#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857}
2858
2859/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002860int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2861 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002862
2863#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 int rc;
2865 int tag = KMP_EXTRACT_D_TAG(user_lock);
2866#if USE_ITT_BUILD
2867 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2868#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002869#if OMPT_SUPPORT && OMPT_OPTIONAL
2870 // This is the case, if called from omp_init_lock_with_hint:
2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2872 if (!codeptr)
2873 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2874 if (ompt_enabled.ompt_callback_mutex_acquire) {
2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2876 ompt_mutex_lock, omp_lock_hint_none,
2877 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2878 codeptr);
2879 }
2880#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002881#if KMP_USE_INLINED_TAS
2882 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2883 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2884 } else
2885#elif KMP_USE_INLINED_FUTEX
2886 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2887 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2888 } else
2889#endif
2890 {
2891 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2892 }
2893 if (rc) {
2894#if USE_ITT_BUILD
2895 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2896#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002897#if OMPT_SUPPORT && OMPT_OPTIONAL
2898 if (ompt_enabled.ompt_callback_mutex_acquired) {
2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2900 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2901 }
2902#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002903 return FTN_TRUE;
2904 } else {
2905#if USE_ITT_BUILD
2906 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2907#endif
2908 return FTN_FALSE;
2909 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002910
2911#else // KMP_USE_DYNAMIC_LOCK
2912
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 kmp_user_lock_p lck;
2914 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915
Jonathan Peyton30419822017-05-12 18:01:32 +00002916 if ((__kmp_user_lock_kind == lk_tas) &&
2917 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2918 lck = (kmp_user_lock_p)user_lock;
2919 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002920#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002921 else if ((__kmp_user_lock_kind == lk_futex) &&
2922 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2923 lck = (kmp_user_lock_p)user_lock;
2924 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002926 else {
2927 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2928 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002929
2930#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002931 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002932#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002933#if OMPT_SUPPORT && OMPT_OPTIONAL
2934 // This is the case, if called from omp_init_lock_with_hint:
2935 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2936 if (!codeptr)
2937 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2938 if (ompt_enabled.ompt_callback_mutex_acquire) {
2939 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2940 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2941 (ompt_wait_id_t)lck, codeptr);
2942 }
2943#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944
Jonathan Peyton30419822017-05-12 18:01:32 +00002945 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002947 if (rc) {
2948 __kmp_itt_lock_acquired(lck);
2949 } else {
2950 __kmp_itt_lock_cancelled(lck);
2951 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00002953#if OMPT_SUPPORT && OMPT_OPTIONAL
2954 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2955 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2956 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2957 }
2958#endif
2959
Jonathan Peyton30419822017-05-12 18:01:32 +00002960 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002961
Jonathan Peyton30419822017-05-12 18:01:32 +00002962/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002963
2964#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002965}
2966
2967/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002968int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002969#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002970 int rc;
2971#if USE_ITT_BUILD
2972 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2973#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002974#if OMPT_SUPPORT && OMPT_OPTIONAL
2975 // This is the case, if called from omp_init_lock_with_hint:
2976 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2977 if (!codeptr)
2978 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2979 if (ompt_enabled.ompt_callback_mutex_acquire) {
2980 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2981 ompt_mutex_nest_lock, omp_lock_hint_none,
2982 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2983 codeptr);
2984 }
2985#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002986 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2987#if USE_ITT_BUILD
2988 if (rc) {
2989 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2990 } else {
2991 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2992 }
2993#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00002994#if OMPT_SUPPORT && OMPT_OPTIONAL
2995 if (ompt_enabled.enabled && rc) {
2996 if (rc == 1) {
2997 if (ompt_enabled.ompt_callback_mutex_acquired) {
2998 // lock_first
2999 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3000 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3001 }
3002 } else {
3003 if (ompt_enabled.ompt_callback_nest_lock) {
3004 // lock_next
3005 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3006 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3007 }
3008 }
3009 }
3010#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003011 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003012
3013#else // KMP_USE_DYNAMIC_LOCK
3014
Jonathan Peyton30419822017-05-12 18:01:32 +00003015 kmp_user_lock_p lck;
3016 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003017
Jonathan Peyton30419822017-05-12 18:01:32 +00003018 if ((__kmp_user_lock_kind == lk_tas) &&
3019 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3020 OMP_NEST_LOCK_T_SIZE)) {
3021 lck = (kmp_user_lock_p)user_lock;
3022 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00003023#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00003024 else if ((__kmp_user_lock_kind == lk_futex) &&
3025 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3026 OMP_NEST_LOCK_T_SIZE)) {
3027 lck = (kmp_user_lock_p)user_lock;
3028 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003029#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003030 else {
3031 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3032 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003033
3034#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003035 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003036#endif /* USE_ITT_BUILD */
3037
Joachim Protze82e94a52017-11-01 10:08:30 +00003038#if OMPT_SUPPORT && OMPT_OPTIONAL
3039 // This is the case, if called from omp_init_lock_with_hint:
3040 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3041 if (!codeptr)
3042 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3043 if (ompt_enabled.enabled) &&
3044 ompt_enabled.ompt_callback_mutex_acquire) {
3045 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3046 ompt_mutex_nest_lock, omp_lock_hint_none,
3047 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3048 }
3049#endif
3050
Jonathan Peyton30419822017-05-12 18:01:32 +00003051 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 if (rc) {
3054 __kmp_itt_lock_acquired(lck);
3055 } else {
3056 __kmp_itt_lock_cancelled(lck);
3057 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003058#endif /* USE_ITT_BUILD */
Joachim Protze82e94a52017-11-01 10:08:30 +00003059#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 if (ompt_enabled.enabled && rc) {
3061 if (rc == 1) {
3062 if (ompt_enabled.ompt_callback_mutex_acquired) {
3063 // lock_first
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3065 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3066 }
3067 } else {
3068 if (ompt_enabled.ompt_callback_nest_lock) {
3069 // lock_next
3070 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3071 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3072 }
3073 }
3074 }
3075#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003076 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077
Jonathan Peyton30419822017-05-12 18:01:32 +00003078/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003079
3080#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003081}
3082
Jonathan Peyton30419822017-05-12 18:01:32 +00003083// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003084
Jonathan Peyton30419822017-05-12 18:01:32 +00003085// keep the selected method in a thread local structure for cross-function
3086// usage: will be used in __kmpc_end_reduce* functions;
3087// another solution: to re-determine the method one more time in
3088// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00003090#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3091 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092
Jonathan Peyton30419822017-05-12 18:01:32 +00003093#define __KMP_GET_REDUCTION_METHOD(gtid) \
3094 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003095
Jonathan Peyton30419822017-05-12 18:01:32 +00003096// description of the packed_reduction_method variable: look at the macros in
3097// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00003098
3099// used in a critical section reduce block
3100static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003101__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3102 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104 // this lock was visible to a customer and to the threading profile tool as a
3105 // serial overhead span (although it's used for an internal purpose only)
3106 // why was it visible in previous implementation?
3107 // should we keep it visible in new reduce block?
3108 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003110#if KMP_USE_DYNAMIC_LOCK
3111
Jonathan Peyton30419822017-05-12 18:01:32 +00003112 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3113 // Check if it is initialized.
3114 if (*lk == 0) {
3115 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3116 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3117 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003118 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003119 __kmp_init_indirect_csptr(crit, loc, global_tid,
3120 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003121 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003122 }
3123 // Branch for accessing the actual lock object and set operation. This
3124 // branching is inevitable since this lock initialization does not follow the
3125 // normal dispatch path (lock table is not used).
3126 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3127 lck = (kmp_user_lock_p)lk;
3128 KMP_DEBUG_ASSERT(lck != NULL);
3129 if (__kmp_env_consistency_check) {
3130 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3131 }
3132 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3133 } else {
3134 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3135 lck = ilk->lock;
3136 KMP_DEBUG_ASSERT(lck != NULL);
3137 if (__kmp_env_consistency_check) {
3138 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3139 }
3140 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3141 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003142
3143#else // KMP_USE_DYNAMIC_LOCK
3144
Jonathan Peyton30419822017-05-12 18:01:32 +00003145 // We know that the fast reduction code is only emitted by Intel compilers
3146 // with 32 byte critical sections. If there isn't enough space, then we
3147 // have to use a pointer.
3148 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3149 lck = (kmp_user_lock_p)crit;
3150 } else {
3151 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3152 }
3153 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003154
Jonathan Peyton30419822017-05-12 18:01:32 +00003155 if (__kmp_env_consistency_check)
3156 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003157
Jonathan Peyton30419822017-05-12 18:01:32 +00003158 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003159
3160#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161}
3162
3163// used in a critical section reduce block
3164static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00003165__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3166 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167
Jonathan Peyton30419822017-05-12 18:01:32 +00003168 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003169
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003170#if KMP_USE_DYNAMIC_LOCK
3171
Jonathan Peyton30419822017-05-12 18:01:32 +00003172 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3173 lck = (kmp_user_lock_p)crit;
3174 if (__kmp_env_consistency_check)
3175 __kmp_pop_sync(global_tid, ct_critical, loc);
3176 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3177 } else {
3178 kmp_indirect_lock_t *ilk =
3179 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3180 if (__kmp_env_consistency_check)
3181 __kmp_pop_sync(global_tid, ct_critical, loc);
3182 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3183 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003184
3185#else // KMP_USE_DYNAMIC_LOCK
3186
Jonathan Peyton30419822017-05-12 18:01:32 +00003187 // We know that the fast reduction code is only emitted by Intel compilers
3188 // with 32 byte critical sections. If there isn't enough space, then we have
3189 // to use a pointer.
3190 if (__kmp_base_user_lock_size > 32) {
3191 lck = *((kmp_user_lock_p *)crit);
3192 KMP_ASSERT(lck != NULL);
3193 } else {
3194 lck = (kmp_user_lock_p)crit;
3195 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003196
Jonathan Peyton30419822017-05-12 18:01:32 +00003197 if (__kmp_env_consistency_check)
3198 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003199
Jonathan Peyton30419822017-05-12 18:01:32 +00003200 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003202#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00003203} // __kmp_end_critical_section_reduce_block
3204
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003205#if OMP_40_ENABLED
3206static __forceinline int
3207__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3208 int *task_state) {
3209 kmp_team_t *team;
3210
3211 // Check if we are inside the teams construct?
3212 if (th->th.th_teams_microtask) {
3213 *team_p = team = th->th.th_team;
3214 if (team->t.t_level == th->th.th_teams_level) {
3215 // This is reduction at teams construct.
3216 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3217 // Let's swap teams temporarily for the reduction.
3218 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3219 th->th.th_team = team->t.t_parent;
3220 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3221 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3222 *task_state = th->th.th_task_state;
3223 th->th.th_task_state = 0;
3224
3225 return 1;
3226 }
3227 }
3228 return 0;
3229}
3230
3231static __forceinline void
3232__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3233 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3234 th->th.th_info.ds.ds_tid = 0;
3235 th->th.th_team = team;
3236 th->th.th_team_nproc = team->t.t_nproc;
3237 th->th.th_task_team = team->t.t_task_team[task_state];
3238 th->th.th_task_state = task_state;
3239}
3240#endif
3241
Jim Cownie5e8470a2013-09-27 10:38:44 +00003242/* 2.a.i. Reduce Block without a terminating barrier */
3243/*!
3244@ingroup SYNCHRONIZATION
3245@param loc source location information
3246@param global_tid global thread number
3247@param num_vars number of items (variables) to be reduced
3248@param reduce_size size of data in bytes to be reduced
3249@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003250@param reduce_func callback function providing reduction operation on two
3251operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003252@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003253@result 1 for the master thread, 0 for all other team threads, 2 for all team
3254threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003255
3256The nowait version is used for a reduce clause with the nowait argument.
3257*/
3258kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00003259__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3260 size_t reduce_size, void *reduce_data,
3261 void (*reduce_func)(void *lhs_data, void *rhs_data),
3262 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263
Jonathan Peyton30419822017-05-12 18:01:32 +00003264 KMP_COUNT_BLOCK(REDUCE_nowait);
3265 int retval = 0;
3266 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003267#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003268 kmp_info_t *th;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003269 kmp_team_t *team;
Jonathan Peyton30419822017-05-12 18:01:32 +00003270 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003271#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003272 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003273
Jonathan Peyton30419822017-05-12 18:01:32 +00003274 // why do we need this initialization here at all?
3275 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 // do not call __kmp_serial_initialize(), it will be called by
3278 // __kmp_parallel_initialize() if needed
3279 // possible detection of false-positive race by the threadchecker ???
3280 if (!TCR_4(__kmp_init_parallel))
3281 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003284#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003285 if (__kmp_env_consistency_check)
3286 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003287#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003288 if (__kmp_env_consistency_check)
3289 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003290#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003291
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003292#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003293 th = __kmp_thread_from_gtid(global_tid);
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003294 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003295#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003296
Jonathan Peyton30419822017-05-12 18:01:32 +00003297 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3298 // the value should be kept in a variable
3299 // the variable should be either a construct-specific or thread-specific
3300 // property, not a team specific property
3301 // (a thread can reach the next reduce block on the next construct, reduce
3302 // method may differ on the next construct)
3303 // an ident_t "loc" parameter could be used as a construct-specific property
3304 // (what if loc == 0?)
3305 // (if both construct-specific and team-specific variables were shared,
3306 // then unness extra syncs should be needed)
3307 // a thread-specific variable is better regarding two issues above (next
3308 // construct and extra syncs)
3309 // a thread-specific "th_local.reduction_method" variable is used currently
3310 // each thread executes 'determine' and 'set' lines (no need to execute by one
3311 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003312
Jonathan Peyton30419822017-05-12 18:01:32 +00003313 packed_reduction_method = __kmp_determine_reduction_method(
3314 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3315 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003316
Jonathan Peyton30419822017-05-12 18:01:32 +00003317 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318
Jonathan Peyton30419822017-05-12 18:01:32 +00003319 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3320 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003323
Jonathan Peyton30419822017-05-12 18:01:32 +00003324 // usage: if team size == 1, no synchronization is required ( Intel
3325 // platforms only )
3326 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003327
Jonathan Peyton30419822017-05-12 18:01:32 +00003328 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003329
Jonathan Peyton30419822017-05-12 18:01:32 +00003330 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003331
Jonathan Peyton30419822017-05-12 18:01:32 +00003332 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3333 // won't be called by the code gen)
3334 // (it's not quite good, because the checking block has been closed by
3335 // this 'pop',
3336 // but atomic operation has not been executed yet, will be executed
3337 // slightly later, literally on next instruction)
3338 if (__kmp_env_consistency_check)
3339 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Jonathan Peyton30419822017-05-12 18:01:32 +00003341 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3342 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003343
Jonathan Peyton30419822017-05-12 18:01:32 +00003344// AT: performance issue: a real barrier here
3345// AT: (if master goes slow, other threads are blocked here waiting for the
3346// master to come and release them)
3347// AT: (it's not what a customer might expect specifying NOWAIT clause)
3348// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3349// be confusing to a customer)
3350// AT: another implementation of *barrier_gather*nowait() (or some other design)
3351// might go faster and be more in line with sense of NOWAIT
3352// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00003353
Jonathan Peyton30419822017-05-12 18:01:32 +00003354// this barrier should be invisible to a customer and to the threading profile
3355// tool (it's neither a terminating barrier nor customer's code, it's
3356// used for an internal purpose)
Joachim Protze82e94a52017-11-01 10:08:30 +00003357#if OMPT_SUPPORT
3358 // JP: can this barrier potentially leed to task scheduling?
3359 // JP: as long as there is a barrier in the implementation, OMPT should and
3360 // will provide the barrier events
3361 // so we set-up the necessary frame/return addresses.
3362 ompt_frame_t *ompt_frame;
3363 if (ompt_enabled.enabled) {
3364 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003365 if (ompt_frame->enter_frame == NULL)
3366 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003367 OMPT_STORE_RETURN_ADDRESS(global_tid);
3368 }
3369#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003370#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003371 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003372#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003373 retval =
3374 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3375 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3376 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003377#if OMPT_SUPPORT && OMPT_OPTIONAL
3378 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003379 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003380 }
3381#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003382
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 // all other workers except master should do this pop here
3384 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3385 if (__kmp_env_consistency_check) {
3386 if (retval == 0) {
3387 __kmp_pop_sync(global_tid, ct_reduce, loc);
3388 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003389 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003390
3391 } else {
3392
3393 // should never reach this block
3394 KMP_ASSERT(0); // "unexpected method"
3395 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003396#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003397 if (teams_swapped) {
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003398 __kmp_restore_swapped_teams(th, team, task_state);
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003400#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003401 KA_TRACE(
3402 10,
3403 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3404 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003405
Jonathan Peyton30419822017-05-12 18:01:32 +00003406 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003407}
3408
3409/*!
3410@ingroup SYNCHRONIZATION
3411@param loc source location information
3412@param global_tid global thread id.
3413@param lck pointer to the unique lock data structure
3414
3415Finish the execution of a reduce nowait.
3416*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003417void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3418 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003421
Jonathan Peyton30419822017-05-12 18:01:32 +00003422 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003423
Jonathan Peyton30419822017-05-12 18:01:32 +00003424 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003425
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003427
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431
Jonathan Peyton30419822017-05-12 18:01:32 +00003432 // usage: if team size == 1, no synchronization is required ( on Intel
3433 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00003434
Jonathan Peyton30419822017-05-12 18:01:32 +00003435 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003436
Jonathan Peyton30419822017-05-12 18:01:32 +00003437 // neither master nor other workers should get here
3438 // (code gen does not generate this call in case 2: atomic reduce block)
3439 // actually it's better to remove this elseif at all;
3440 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441
Jonathan Peyton30419822017-05-12 18:01:32 +00003442 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3443 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
Jonathan Peyton30419822017-05-12 18:01:32 +00003445 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00003446
Jonathan Peyton30419822017-05-12 18:01:32 +00003447 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 // should never reach this block
3450 KMP_ASSERT(0); // "unexpected method"
3451 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003452
Jonathan Peyton30419822017-05-12 18:01:32 +00003453 if (__kmp_env_consistency_check)
3454 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3457 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003458
Jonathan Peyton30419822017-05-12 18:01:32 +00003459 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003460}
3461
3462/* 2.a.ii. Reduce Block with a terminating barrier */
3463
3464/*!
3465@ingroup SYNCHRONIZATION
3466@param loc source location information
3467@param global_tid global thread number
3468@param num_vars number of items (variables) to be reduced
3469@param reduce_size size of data in bytes to be reduced
3470@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00003471@param reduce_func callback function providing reduction operation on two
3472operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00003473@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00003474@result 1 for the master thread, 0 for all other team threads, 2 for all team
3475threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003476
3477A blocking reduce that includes an implicit barrier.
3478*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003479kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3480 size_t reduce_size, void *reduce_data,
3481 void (*reduce_func)(void *lhs_data, void *rhs_data),
3482 kmp_critical_name *lck) {
3483 KMP_COUNT_BLOCK(REDUCE_wait);
3484 int retval = 0;
3485 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003486#if OMP_40_ENABLED
3487 kmp_info_t *th;
3488 kmp_team_t *team;
3489 int teams_swapped = 0, task_state;
3490#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003491
Jonathan Peyton30419822017-05-12 18:01:32 +00003492 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493
Jonathan Peyton30419822017-05-12 18:01:32 +00003494 // why do we need this initialization here at all?
3495 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003496
Jonathan Peyton30419822017-05-12 18:01:32 +00003497 // do not call __kmp_serial_initialize(), it will be called by
3498 // __kmp_parallel_initialize() if needed
3499 // possible detection of false-positive race by the threadchecker ???
3500 if (!TCR_4(__kmp_init_parallel))
3501 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502
Jonathan Peyton30419822017-05-12 18:01:32 +00003503// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003504#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00003505 if (__kmp_env_consistency_check)
3506 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003507#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003508 if (__kmp_env_consistency_check)
3509 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003510#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003511
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003512#if OMP_40_ENABLED
3513 th = __kmp_thread_from_gtid(global_tid);
3514 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3515#endif // OMP_40_ENABLED
3516
Jonathan Peyton30419822017-05-12 18:01:32 +00003517 packed_reduction_method = __kmp_determine_reduction_method(
3518 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3519 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003520
Jonathan Peyton30419822017-05-12 18:01:32 +00003521 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003522
Jonathan Peyton30419822017-05-12 18:01:32 +00003523 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3524 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003525
Jonathan Peyton30419822017-05-12 18:01:32 +00003526 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003527
Jonathan Peyton30419822017-05-12 18:01:32 +00003528 // usage: if team size == 1, no synchronization is required ( Intel
3529 // platforms only )
3530 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003531
Jonathan Peyton30419822017-05-12 18:01:32 +00003532 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003533
Jonathan Peyton30419822017-05-12 18:01:32 +00003534 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003535
Jonathan Peyton30419822017-05-12 18:01:32 +00003536 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3537 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538
Jonathan Peyton30419822017-05-12 18:01:32 +00003539// case tree_reduce_block:
3540// this barrier should be visible to a customer and to the threading profile
3541// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Joachim Protze82e94a52017-11-01 10:08:30 +00003542#if OMPT_SUPPORT
3543 ompt_frame_t *ompt_frame;
3544 if (ompt_enabled.enabled) {
3545 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003546 if (ompt_frame->enter_frame == NULL)
3547 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003548 OMPT_STORE_RETURN_ADDRESS(global_tid);
3549 }
3550#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003551#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003552 __kmp_threads[global_tid]->th.th_ident =
3553 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003554#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003555 retval =
3556 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3557 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3558 retval = (retval != 0) ? (0) : (1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003559#if OMPT_SUPPORT && OMPT_OPTIONAL
3560 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003561 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003562 }
3563#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003564
Jonathan Peyton30419822017-05-12 18:01:32 +00003565 // all other workers except master should do this pop here
3566 // ( none of other workers except master will enter __kmpc_end_reduce() )
3567 if (__kmp_env_consistency_check) {
3568 if (retval == 0) { // 0: all other workers; 1: master
3569 __kmp_pop_sync(global_tid, ct_reduce, loc);
3570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003571 }
3572
Jonathan Peyton30419822017-05-12 18:01:32 +00003573 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 // should never reach this block
3576 KMP_ASSERT(0); // "unexpected method"
3577 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003578#if OMP_40_ENABLED
3579 if (teams_swapped) {
3580 __kmp_restore_swapped_teams(th, team, task_state);
3581 }
3582#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003583
3584 KA_TRACE(10,
3585 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3586 global_tid, packed_reduction_method, retval));
3587
3588 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003589}
3590
3591/*!
3592@ingroup SYNCHRONIZATION
3593@param loc source location information
3594@param global_tid global thread id.
3595@param lck pointer to the unique lock data structure
3596
3597Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00003598The <tt>lck</tt> pointer must be the same as that used in the corresponding
3599start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003600*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003601void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3602 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003603
Jonathan Peyton30419822017-05-12 18:01:32 +00003604 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003605#if OMP_40_ENABLED
3606 kmp_info_t *th;
3607 kmp_team_t *team;
3608 int teams_swapped = 0, task_state;
3609#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003610
Jonathan Peyton30419822017-05-12 18:01:32 +00003611 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003612
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003613#if OMP_40_ENABLED
3614 th = __kmp_thread_from_gtid(global_tid);
3615 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3616#endif // OMP_40_ENABLED
3617
Jonathan Peyton30419822017-05-12 18:01:32 +00003618 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003619
Jonathan Peyton30419822017-05-12 18:01:32 +00003620 // this barrier should be visible to a customer and to the threading profile
3621 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003622
Jonathan Peyton30419822017-05-12 18:01:32 +00003623 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003624
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003626
Jonathan Peyton30419822017-05-12 18:01:32 +00003627// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003628#if OMPT_SUPPORT
3629 ompt_frame_t *ompt_frame;
3630 if (ompt_enabled.enabled) {
3631 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003632 if (ompt_frame->enter_frame == NULL)
3633 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003634 OMPT_STORE_RETURN_ADDRESS(global_tid);
3635 }
3636#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003637#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003639#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003640 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003641#if OMPT_SUPPORT && OMPT_OPTIONAL
3642 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003643 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003644 }
3645#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003646
Jonathan Peyton30419822017-05-12 18:01:32 +00003647 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003648
Jonathan Peyton30419822017-05-12 18:01:32 +00003649// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003650
Jonathan Peyton30419822017-05-12 18:01:32 +00003651// TODO: implicit barrier: should be exposed
Joachim Protze82e94a52017-11-01 10:08:30 +00003652#if OMPT_SUPPORT
3653 ompt_frame_t *ompt_frame;
3654 if (ompt_enabled.enabled) {
3655 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003656 if (ompt_frame->enter_frame == NULL)
3657 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003658 OMPT_STORE_RETURN_ADDRESS(global_tid);
3659 }
3660#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003661#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003662 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003663#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003664 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003665#if OMPT_SUPPORT && OMPT_OPTIONAL
3666 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003667 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003668 }
3669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670
Jonathan Peyton30419822017-05-12 18:01:32 +00003671 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672
Joachim Protze82e94a52017-11-01 10:08:30 +00003673#if OMPT_SUPPORT
3674 ompt_frame_t *ompt_frame;
3675 if (ompt_enabled.enabled) {
3676 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
Joachim Protzec255ca72017-11-05 14:11:10 +00003677 if (ompt_frame->enter_frame == NULL)
3678 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00003679 OMPT_STORE_RETURN_ADDRESS(global_tid);
3680 }
3681#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003682// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003683#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00003684 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003685#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003686 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00003687#if OMPT_SUPPORT && OMPT_OPTIONAL
3688 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00003689 ompt_frame->enter_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00003690 }
3691#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692
Jonathan Peyton30419822017-05-12 18:01:32 +00003693 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3694 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695
Jonathan Peyton30419822017-05-12 18:01:32 +00003696 // only master executes here (master releases all other workers)
3697 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3698 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003699
Jonathan Peyton30419822017-05-12 18:01:32 +00003700 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003701
Jonathan Peyton30419822017-05-12 18:01:32 +00003702 // should never reach this block
3703 KMP_ASSERT(0); // "unexpected method"
3704 }
Jonas Hahnfelda4ca5252017-12-05 16:51:24 +00003705#if OMP_40_ENABLED
3706 if (teams_swapped) {
3707 __kmp_restore_swapped_teams(th, team, task_state);
3708 }
3709#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003710
Jonathan Peyton30419822017-05-12 18:01:32 +00003711 if (__kmp_env_consistency_check)
3712 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003713
Jonathan Peyton30419822017-05-12 18:01:32 +00003714 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3715 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716
Jonathan Peyton30419822017-05-12 18:01:32 +00003717 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718}
3719
3720#undef __KMP_GET_REDUCTION_METHOD
3721#undef __KMP_SET_REDUCTION_METHOD
3722
Jonathan Peyton30419822017-05-12 18:01:32 +00003723/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724
Jonathan Peyton30419822017-05-12 18:01:32 +00003725kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003726
Jonathan Peyton30419822017-05-12 18:01:32 +00003727 kmp_int32 gtid;
3728 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003729
Jonathan Peyton30419822017-05-12 18:01:32 +00003730 gtid = __kmp_get_gtid();
3731 if (gtid < 0) {
3732 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003733 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 thread = __kmp_thread_from_gtid(gtid);
3735 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
3737} // __kmpc_get_taskid
3738
Jonathan Peyton30419822017-05-12 18:01:32 +00003739kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740
Jonathan Peyton30419822017-05-12 18:01:32 +00003741 kmp_int32 gtid;
3742 kmp_info_t *thread;
3743 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745 gtid = __kmp_get_gtid();
3746 if (gtid < 0) {
3747 return 0;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003748 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003749 thread = __kmp_thread_from_gtid(gtid);
3750 parent_task = thread->th.th_current_task->td_parent;
3751 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752
3753} // __kmpc_get_parent_taskid
3754
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003755#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003756/*!
3757@ingroup WORK_SHARING
3758@param loc source location information.
3759@param gtid global thread number.
3760@param num_dims number of associated doacross loops.
3761@param dims info on loops bounds.
3762
3763Initialize doacross loop information.
3764Expect compiler send us inclusive bounds,
3765e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3766*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003767void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3768 struct kmp_dim *dims) {
3769 int j, idx;
3770 kmp_int64 last, trace_count;
3771 kmp_info_t *th = __kmp_threads[gtid];
3772 kmp_team_t *team = th->th.th_team;
3773 kmp_uint32 *flags;
3774 kmp_disp_t *pr_buf = th->th.th_dispatch;
3775 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003776
Jonathan Peyton30419822017-05-12 18:01:32 +00003777 KA_TRACE(
3778 20,
3779 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3780 gtid, num_dims, !team->t.t_serialized));
3781 KMP_DEBUG_ASSERT(dims != NULL);
3782 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003783
Jonathan Peyton30419822017-05-12 18:01:32 +00003784 if (team->t.t_serialized) {
3785 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3786 return; // no dependencies if team is serialized
3787 }
3788 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3789 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3790 // the next loop
3791 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003792
Jonathan Peyton30419822017-05-12 18:01:32 +00003793 // Save bounds info into allocated private buffer
3794 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3795 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3796 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3797 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3798 pr_buf->th_doacross_info[0] =
3799 (kmp_int64)num_dims; // first element is number of dimensions
3800 // Save also address of num_done in order to access it later without knowing
3801 // the buffer index
3802 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3803 pr_buf->th_doacross_info[2] = dims[0].lo;
3804 pr_buf->th_doacross_info[3] = dims[0].up;
3805 pr_buf->th_doacross_info[4] = dims[0].st;
3806 last = 5;
3807 for (j = 1; j < num_dims; ++j) {
3808 kmp_int64
3809 range_length; // To keep ranges of all dimensions but the first dims[0]
3810 if (dims[j].st == 1) { // most common case
3811 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3812 range_length = dims[j].up - dims[j].lo + 1;
3813 } else {
3814 if (dims[j].st > 0) {
3815 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3816 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3817 } else { // negative increment
3818 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3819 range_length =
3820 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3821 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003822 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003823 pr_buf->th_doacross_info[last++] = range_length;
3824 pr_buf->th_doacross_info[last++] = dims[j].lo;
3825 pr_buf->th_doacross_info[last++] = dims[j].up;
3826 pr_buf->th_doacross_info[last++] = dims[j].st;
3827 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003828
Jonathan Peyton30419822017-05-12 18:01:32 +00003829 // Compute total trip count.
3830 // Start with range of dims[0] which we don't need to keep in the buffer.
3831 if (dims[0].st == 1) { // most common case
3832 trace_count = dims[0].up - dims[0].lo + 1;
3833 } else if (dims[0].st > 0) {
3834 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3835 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3836 } else { // negative increment
3837 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3838 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3839 }
3840 for (j = 1; j < num_dims; ++j) {
3841 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3842 }
3843 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003844
Jonathan Peyton30419822017-05-12 18:01:32 +00003845 // Check if shared buffer is not occupied by other loop (idx -
3846 // __kmp_dispatch_num_buffers)
3847 if (idx != sh_buf->doacross_buf_idx) {
3848 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003849 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3850 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003852#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003853 // Check if we are the first thread. After the CAS the first thread gets 0,
3854 // others get 1 if initialization is in progress, allocated pointer otherwise.
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003855 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3856 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3857 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3858#else
Jonathan Peyton30419822017-05-12 18:01:32 +00003859 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003860 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3861#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003862 if (flags == NULL) {
3863 // we are the first thread, allocate the array of flags
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003864 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003865 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3866 KMP_MB();
3867 sh_buf->doacross_flags = flags;
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003868 } else if (flags == (kmp_uint32 *)1) {
3869#if KMP_32_BIT_ARCH
Jonathan Peyton30419822017-05-12 18:01:32 +00003870 // initialization is still in progress, need to wait
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003871 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3872#else
3873 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3874#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003875 KMP_YIELD(TRUE);
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003876 KMP_MB();
3877 } else {
3878 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003879 }
Andrey Churbanov58acafc2017-11-20 16:00:42 +00003880 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
Jonathan Peyton30419822017-05-12 18:01:32 +00003881 pr_buf->th_doacross_flags =
3882 sh_buf->doacross_flags; // save private copy in order to not
3883 // touch shared buffer on each iteration
3884 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003885}
3886
Jonathan Peyton30419822017-05-12 18:01:32 +00003887void __kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec) {
3888 kmp_int32 shft, num_dims, i;
3889 kmp_uint32 flag;
3890 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3891 kmp_info_t *th = __kmp_threads[gtid];
3892 kmp_team_t *team = th->th.th_team;
3893 kmp_disp_t *pr_buf;
3894 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003895
Jonathan Peyton30419822017-05-12 18:01:32 +00003896 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3897 if (team->t.t_serialized) {
3898 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3899 return; // no dependencies if team is serialized
3900 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003901
Jonathan Peyton30419822017-05-12 18:01:32 +00003902 // calculate sequential iteration number and check out-of-bounds condition
3903 pr_buf = th->th.th_dispatch;
3904 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3905 num_dims = pr_buf->th_doacross_info[0];
3906 lo = pr_buf->th_doacross_info[2];
3907 up = pr_buf->th_doacross_info[3];
3908 st = pr_buf->th_doacross_info[4];
3909 if (st == 1) { // most common case
3910 if (vec[0] < lo || vec[0] > up) {
3911 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3912 "bounds [%lld,%lld]\n",
3913 gtid, vec[0], lo, up));
3914 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003915 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003916 iter_number = vec[0] - lo;
3917 } else if (st > 0) {
3918 if (vec[0] < lo || vec[0] > up) {
3919 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3920 "bounds [%lld,%lld]\n",
3921 gtid, vec[0], lo, up));
3922 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003923 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003924 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3925 } else { // negative increment
3926 if (vec[0] > lo || vec[0] < up) {
3927 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3928 "bounds [%lld,%lld]\n",
3929 gtid, vec[0], lo, up));
3930 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003931 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003932 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3933 }
3934 for (i = 1; i < num_dims; ++i) {
3935 kmp_int64 iter, ln;
3936 kmp_int32 j = i * 4;
3937 ln = pr_buf->th_doacross_info[j + 1];
3938 lo = pr_buf->th_doacross_info[j + 2];
3939 up = pr_buf->th_doacross_info[j + 3];
3940 st = pr_buf->th_doacross_info[j + 4];
3941 if (st == 1) {
3942 if (vec[i] < lo || vec[i] > up) {
3943 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3944 "bounds [%lld,%lld]\n",
3945 gtid, vec[i], lo, up));
3946 return;
3947 }
3948 iter = vec[i] - lo;
3949 } else if (st > 0) {
3950 if (vec[i] < lo || vec[i] > up) {
3951 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3952 "bounds [%lld,%lld]\n",
3953 gtid, vec[i], lo, up));
3954 return;
3955 }
3956 iter = (kmp_uint64)(vec[i] - lo) / st;
3957 } else { // st < 0
3958 if (vec[i] > lo || vec[i] < up) {
3959 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3960 "bounds [%lld,%lld]\n",
3961 gtid, vec[i], lo, up));
3962 return;
3963 }
3964 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3965 }
3966 iter_number = iter + ln * iter_number;
3967 }
3968 shft = iter_number % 32; // use 32-bit granularity
3969 iter_number >>= 5; // divided by 32
3970 flag = 1 << shft;
3971 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3972 KMP_YIELD(TRUE);
3973 }
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00003974 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00003975 KA_TRACE(20,
3976 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3977 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003978}
3979
Jonathan Peyton30419822017-05-12 18:01:32 +00003980void __kmpc_doacross_post(ident_t *loc, int gtid, long long *vec) {
3981 kmp_int32 shft, num_dims, i;
3982 kmp_uint32 flag;
3983 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3984 kmp_info_t *th = __kmp_threads[gtid];
3985 kmp_team_t *team = th->th.th_team;
3986 kmp_disp_t *pr_buf;
3987 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003988
Jonathan Peyton30419822017-05-12 18:01:32 +00003989 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3990 if (team->t.t_serialized) {
3991 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
3992 return; // no dependencies if team is serialized
3993 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003994
Jonathan Peyton30419822017-05-12 18:01:32 +00003995 // calculate sequential iteration number (same as in "wait" but no
3996 // out-of-bounds checks)
3997 pr_buf = th->th.th_dispatch;
3998 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3999 num_dims = pr_buf->th_doacross_info[0];
4000 lo = pr_buf->th_doacross_info[2];
4001 st = pr_buf->th_doacross_info[4];
4002 if (st == 1) { // most common case
4003 iter_number = vec[0] - lo;
4004 } else if (st > 0) {
4005 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4006 } else { // negative increment
4007 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4008 }
4009 for (i = 1; i < num_dims; ++i) {
4010 kmp_int64 iter, ln;
4011 kmp_int32 j = i * 4;
4012 ln = pr_buf->th_doacross_info[j + 1];
4013 lo = pr_buf->th_doacross_info[j + 2];
4014 st = pr_buf->th_doacross_info[j + 4];
4015 if (st == 1) {
4016 iter = vec[i] - lo;
4017 } else if (st > 0) {
4018 iter = (kmp_uint64)(vec[i] - lo) / st;
4019 } else { // st < 0
4020 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00004021 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004022 iter_number = iter + ln * iter_number;
4023 }
4024 shft = iter_number % 32; // use 32-bit granularity
4025 iter_number >>= 5; // divided by 32
4026 flag = 1 << shft;
Jonas Hahnfeld221e7bb2017-11-22 17:15:20 +00004027 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00004028 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004029 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4031 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004032}
4033
Jonathan Peyton30419822017-05-12 18:01:32 +00004034void __kmpc_doacross_fini(ident_t *loc, int gtid) {
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004035 kmp_int32 num_done;
Jonathan Peyton30419822017-05-12 18:01:32 +00004036 kmp_info_t *th = __kmp_threads[gtid];
4037 kmp_team_t *team = th->th.th_team;
4038 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004039
Jonathan Peyton30419822017-05-12 18:01:32 +00004040 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4041 if (team->t.t_serialized) {
4042 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4043 return; // nothing to do
4044 }
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004045 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004046 if (num_done == th->th.th_team_nproc) {
4047 // we are the last thread, need to free shared resources
4048 int idx = pr_buf->th_doacross_buf_idx - 1;
4049 dispatch_shared_info_t *sh_buf =
4050 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4052 (kmp_int64)&sh_buf->doacross_num_done);
Jonas Hahnfeld3ffca792018-01-07 16:54:36 +00004053 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
Jonathan Peyton30419822017-05-12 18:01:32 +00004054 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004055 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00004056 sh_buf->doacross_flags = NULL;
4057 sh_buf->doacross_num_done = 0;
4058 sh_buf->doacross_buf_idx +=
4059 __kmp_dispatch_num_buffers; // free buffer for future re-use
4060 }
4061 // free private resources (need to keep buffer index forever)
4062 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4063 pr_buf->th_doacross_info = NULL;
4064 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00004065}
4066#endif
4067
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004068#if OMP_50_ENABLED
Andrey Churbanov2d91a8a2018-03-22 18:51:51 +00004069int __kmpc_get_target_offload(void) {
4070 if (!__kmp_init_serial) {
4071 __kmp_serial_initialize();
4072 }
4073 return __kmp_target_offload;
4074}
Jonathan Peyton78f977f2018-03-20 21:18:17 +00004075#endif // OMP_50_ENABLED
4076
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077// end of file //