blob: 8b7067d9c42d4827f220fa17faf27cd9a2c113f2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
Jonathan Peyton30419822017-05-12 18:01:32 +000016#include "omp.h" /* extern "C" declarations of user-visible routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_i18n.h"
20#include "kmp_itt.h"
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000021#include "kmp_lock.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000024#if OMPT_SUPPORT
25#include "ompt-internal.h"
26#include "ompt-specific.h"
27#endif
28
Jim Cownie5e8470a2013-09-27 10:38:44 +000029#define MAX_MESSAGE 512
30
Jonathan Peyton30419822017-05-12 18:01:32 +000031// flags will be used in future, e.g. to implement openmp_strict library
32// restrictions
Jim Cownie5e8470a2013-09-27 10:38:44 +000033
34/*!
35 * @ingroup STARTUP_SHUTDOWN
36 * @param loc in source location information
37 * @param flags in for future use (currently ignored)
38 *
39 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000040 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000041 */
Jonathan Peyton30419822017-05-12 18:01:32 +000042void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
43 // By default __kmpc_begin() is no-op.
44 char *env;
45 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
46 __kmp_str_match_true(env)) {
47 __kmp_middle_initialize();
48 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
49 } else if (__kmp_ignore_mppbeg() == FALSE) {
50 // By default __kmp_ignore_mppbeg() returns TRUE.
51 __kmp_internal_begin();
52 KC_TRACE(10, ("__kmpc_begin: called\n"));
53 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000054}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
Jonathan Peyton30419822017-05-12 18:01:32 +000060 * Shutdown the runtime library. This is also optional, and even if called will
61 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
62 * zero.
63 */
64void __kmpc_end(ident_t *loc) {
65 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
66 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
67 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
68 // returns FALSE and __kmpc_end() will unregister this root (it can cause
69 // library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE(10, ("__kmpc_end: called\n"));
72 KA_TRACE(30, ("__kmpc_end\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074 __kmp_internal_end_thread(-1);
75 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000076}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
Jonathan Peyton30419822017-05-12 18:01:32 +000086single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
87that which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000088active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
Jim Cownie5e8470a2013-09-27 10:38:44 +000095*/
Jonathan Peyton30419822017-05-12 18:01:32 +000096kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
97 kmp_int32 gtid = __kmp_entry_gtid();
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Jonathan Peyton30419822017-05-12 18:01:32 +000099 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100
Jonathan Peyton30419822017-05-12 18:01:32 +0000101 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102}
103
104/*!
105@ingroup THREAD_STATES
106@param loc Source location information.
107@return The number of threads under control of the OpenMP<sup>*</sup> runtime
108
109This function can be called in any context.
Jonathan Peyton30419822017-05-12 18:01:32 +0000110It returns the total number of threads under the control of the OpenMP runtime.
111That is not a number that can be determined by any OpenMP standard calls, since
112the library may be called from more than one non-OpenMP thread, and this
113reflects the total over all such calls. Similarly the runtime maintains
114underlying threads even when they are not active (since the cost of creating
115and destroying OS threads is high), this call counts all such threads even if
116they are not waiting for work.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000117*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000118kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
119 KC_TRACE(10,
120 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121
Jonathan Peyton30419822017-05-12 18:01:32 +0000122 return TCR_4(__kmp_all_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123}
124
125/*!
126@ingroup THREAD_STATES
127@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000128@return The thread number of the calling thread in the innermost active parallel
129construct.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000131kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
132 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
133 return __kmp_tid_from_gtid(__kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134}
135
136/*!
137@ingroup THREAD_STATES
138@param loc Source location information.
139@return The number of threads in the innermost active parallel construct.
140*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000141kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
142 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000143
Jonathan Peyton30419822017-05-12 18:01:32 +0000144 return __kmp_entry_thread()->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145}
146
147/*!
148 * @ingroup DEPRECATED
149 * @param loc location description
150 *
151 * This function need not be called. It always returns TRUE.
152 */
Jonathan Peyton30419822017-05-12 18:01:32 +0000153kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154#ifndef KMP_DEBUG
155
Jonathan Peyton30419822017-05-12 18:01:32 +0000156 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157
158#else
159
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 const char *semi2;
161 const char *semi3;
162 int line_no;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 if (__kmp_par_range == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000165 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 }
167 semi2 = loc->psource;
168 if (semi2 == NULL) {
169 return TRUE;
170 }
171 semi2 = strchr(semi2, ';');
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2 + 1, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 if (__kmp_par_range_filename[0]) {
180 const char *name = semi2 - 1;
181 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
182 name--;
183 }
184 if ((*name == '/') || (*name == ';')) {
185 name++;
186 }
187 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
188 return __kmp_par_range < 0;
189 }
190 }
191 semi3 = strchr(semi2 + 1, ';');
192 if (__kmp_par_range_routine[0]) {
193 if ((semi3 != NULL) && (semi3 > semi2) &&
194 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
195 return __kmp_par_range < 0;
196 }
197 }
198 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
199 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
200 return __kmp_par_range > 0;
201 }
202 return __kmp_par_range < 0;
203 }
204 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205
206#endif /* KMP_DEBUG */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207}
208
209/*!
210@ingroup THREAD_STATES
211@param loc Source location information.
Jonathan Peyton30419822017-05-12 18:01:32 +0000212@return 1 if this thread is executing inside an active parallel region, zero if
213not.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000215kmp_int32 __kmpc_in_parallel(ident_t *loc) {
216 return __kmp_entry_thread()->th.th_root->r.r_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000217}
218
219/*!
220@ingroup PARALLEL
221@param loc source location information
222@param global_tid global thread number
223@param num_threads number of threads requested for this parallel construct
224
225Set the number of threads to be used by the next fork spawned by this thread.
226This call is only required if the parallel construct has a `num_threads` clause.
227*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000228void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
229 kmp_int32 num_threads) {
230 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
231 global_tid, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 __kmp_push_num_threads(loc, global_tid, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234}
235
Jonathan Peyton30419822017-05-12 18:01:32 +0000236void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
237 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238
Jonathan Peyton30419822017-05-12 18:01:32 +0000239 /* the num_threads are automatically popped */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240}
241
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242#if OMP_40_ENABLED
243
Jonathan Peyton30419822017-05-12 18:01:32 +0000244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248
Jonathan Peyton30419822017-05-12 18:01:32 +0000249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250}
251
252#endif /* OMP_40_ENABLED */
253
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254/*!
255@ingroup PARALLEL
256@param loc source location information
257@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000258@param microtask pointer to callback routine consisting of outlined parallel
259construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260@param ... pointers to shared variables that aren't global
261
262Do the actual fork and call the microtask in the relevant number of threads.
263*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000264void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
265 int gtid = __kmp_entry_gtid();
Jonathan Peyton45be4502015-08-11 21:36:41 +0000266
Jonathan Peyton61118492016-05-20 19:03:38 +0000267#if (KMP_STATS_ENABLED)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000268 int inParallel = __kmpc_in_parallel(loc);
Jonathan Peyton30419822017-05-12 18:01:32 +0000269 if (inParallel) {
270 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
271 } else {
272 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000273 }
274#endif
275
Jim Cownie5e8470a2013-09-27 10:38:44 +0000276 // maybe to save thr_state is enough here
277 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000278 va_list ap;
279 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000281#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 ompt_frame_t *ompt_frame;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000283 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 kmp_info_t *master_th = __kmp_threads[gtid];
285 kmp_team_t *parent_team = master_th->th.th_team;
286 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
287 if (lwt)
288 ompt_frame = &(lwt->ompt_task_info.frame);
289 else {
290 int tid = __kmp_tid_from_gtid(gtid);
291 ompt_frame = &(
292 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
293 }
294 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000295 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000296#endif
297
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000298#if INCLUDE_SSC_MARKS
299 SSC_MARK_FORKING();
300#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000301 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000302#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000303 VOLATILE_CAST(void *) microtask, // "unwrapped" task
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000304#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000305 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
306 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000308#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000309 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000311 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000313 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000314#if INCLUDE_SSC_MARKS
315 SSC_MARK_JOINING();
316#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000318#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000319 ,
320 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000321#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 }
326}
327
328#if OMP_40_ENABLED
329/*!
330@ingroup PARALLEL
331@param loc source location information
332@param global_tid global thread number
333@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000334@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335
336Set the number of teams to be used by the teams construct.
337This call is only required if the teams construct has a `num_teams` clause
338or a `thread_limit` clause (or both).
339*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000340void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
341 kmp_int32 num_teams, kmp_int32 num_threads) {
342 KA_TRACE(20,
343 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
344 global_tid, num_teams, num_threads));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345
Jonathan Peyton30419822017-05-12 18:01:32 +0000346 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347}
348
349/*!
350@ingroup PARALLEL
351@param loc source location information
352@param argc total number of arguments in the ellipsis
Jonathan Peyton30419822017-05-12 18:01:32 +0000353@param microtask pointer to callback routine consisting of outlined teams
354construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355@param ... pointers to shared variables that aren't global
356
357Do the actual fork and call the microtask in the relevant number of threads.
358*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000359void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
360 ...) {
361 int gtid = __kmp_entry_gtid();
362 kmp_info_t *this_thr = __kmp_threads[gtid];
363 va_list ap;
364 va_start(ap, microtask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365
Jonathan Peyton30419822017-05-12 18:01:32 +0000366 KMP_COUNT_BLOCK(OMP_TEAMS);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000367
Jonathan Peyton30419822017-05-12 18:01:32 +0000368 // remember teams entry point and nesting level
369 this_thr->th.th_teams_microtask = microtask;
370 this_thr->th.th_teams_level =
371 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
Jim Cownie5e8470a2013-09-27 10:38:44 +0000372
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000373#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000374 kmp_team_t *parent_team = this_thr->th.th_team;
375 int tid = __kmp_tid_from_gtid(gtid);
376 if (ompt_enabled) {
377 parent_team->t.t_implicit_task_taskdata[tid]
378 .ompt_task_info.frame.reenter_runtime_frame =
379 __builtin_frame_address(1);
380 }
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000381#endif
382
Jonathan Peyton30419822017-05-12 18:01:32 +0000383 // check if __kmpc_push_num_teams called, set default number of teams
384 // otherwise
385 if (this_thr->th.th_teams_size.nteams == 0) {
386 __kmp_push_num_teams(loc, gtid, 0, 0);
387 }
388 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
389 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
390 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000391
Jonathan Peyton30419822017-05-12 18:01:32 +0000392 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000393#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 VOLATILE_CAST(void *) microtask, // "unwrapped" task
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000395#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 VOLATILE_CAST(microtask_t)
397 __kmp_teams_master, // "wrapped" task
398 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000399#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000400 &ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000401#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 ap
Jim Cownie5e8470a2013-09-27 10:38:44 +0000403#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 );
405 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000406#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000407 ,
408 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +0000409#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000410 );
Jonathan Peyton3fdf3292015-07-21 18:03:30 +0000411
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 this_thr->th.th_teams_microtask = NULL;
413 this_thr->th.th_teams_level = 0;
414 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
415 va_end(ap);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416}
417#endif /* OMP_40_ENABLED */
418
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419// I don't think this function should ever have been exported.
420// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
421// openmp code ever called it, but it's been exported from the RTL for so
422// long that I'm afraid to remove the definition.
Jonathan Peyton30419822017-05-12 18:01:32 +0000423int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424
425/*!
426@ingroup PARALLEL
427@param loc source location information
428@param global_tid global thread number
429
430Enter a serialized parallel construct. This interface is used to handle a
431conditional parallel region, like this,
432@code
433#pragma omp parallel if (condition)
434@endcode
435when the condition is false.
436*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000437void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
438 // The implementation is now in kmp_runtime.cpp so that it can share static
439 // functions with kmp_fork_call since the tasks to be done are similar in
440 // each case.
441 __kmp_serialized_parallel(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000442}
443
444/*!
445@ingroup PARALLEL
446@param loc source location information
447@param global_tid global thread number
448
449Leave a serialized parallel construct.
450*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000451void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
452 kmp_internal_control_t *top;
453 kmp_info_t *this_thr;
454 kmp_team_t *serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000455
Jonathan Peyton30419822017-05-12 18:01:32 +0000456 KC_TRACE(10,
457 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000458
Jonathan Peyton30419822017-05-12 18:01:32 +0000459 /* skip all this code for autopar serialized loops since it results in
460 unacceptable overhead */
461 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
462 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 // Not autopar code
465 if (!TCR_4(__kmp_init_parallel))
466 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000467
Jonathan Peyton30419822017-05-12 18:01:32 +0000468 this_thr = __kmp_threads[global_tid];
469 serial_team = this_thr->th.th_serial_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471#if OMP_45_ENABLED
472 kmp_task_team_t *task_team = this_thr->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 // we need to wait for the proxy tasks before finishing the thread
475 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
476 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
477#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 KMP_MB();
480 KMP_DEBUG_ASSERT(serial_team);
481 KMP_ASSERT(serial_team->t.t_serialized);
482 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
483 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
484 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
485 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000486
Jonathan Peyton30419822017-05-12 18:01:32 +0000487 /* If necessary, pop the internal control stack values and replace the team
488 * values */
489 top = serial_team->t.t_control_stack_top;
490 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
491 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
492 serial_team->t.t_control_stack_top = top->next;
493 __kmp_free(top);
494 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496 // if( serial_team -> t.t_serialized > 1 )
497 serial_team->t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000498
Jonathan Peyton30419822017-05-12 18:01:32 +0000499 /* pop dispatch buffers stack */
500 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
501 {
502 dispatch_private_info_t *disp_buffer =
503 serial_team->t.t_dispatch->th_disp_buffer;
504 serial_team->t.t_dispatch->th_disp_buffer =
505 serial_team->t.t_dispatch->th_disp_buffer->next;
506 __kmp_free(disp_buffer);
507 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000508
Jonathan Peyton30419822017-05-12 18:01:32 +0000509 --serial_team->t.t_serialized;
510 if (serial_team->t.t_serialized == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000511
Jonathan Peyton30419822017-05-12 18:01:32 +0000512/* return to the parallel section */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000513
514#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
516 __kmp_clear_x87_fpu_status_word();
517 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
518 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000520#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 this_thr->th.th_team = serial_team->t.t_parent;
523 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000524
Jonathan Peyton30419822017-05-12 18:01:32 +0000525 /* restore values cached in the thread */
526 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
527 this_thr->th.th_team_master =
528 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
529 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 /* TODO the below shouldn't need to be adjusted for serialized teams */
532 this_thr->th.th_dispatch =
533 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534
Jonathan Peyton30419822017-05-12 18:01:32 +0000535 __kmp_pop_current_task_from_thread(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536
Jonathan Peyton30419822017-05-12 18:01:32 +0000537 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
538 this_thr->th.th_current_task->td_flags.executing = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539
Jonathan Peyton30419822017-05-12 18:01:32 +0000540 if (__kmp_tasking_mode != tskm_immediate_exec) {
541 // Copy the task team from the new child / old parent team to the thread.
542 this_thr->th.th_task_team =
543 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
544 KA_TRACE(20,
545 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
546 "team %p\n",
547 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000548 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000549 } else {
550 if (__kmp_tasking_mode != tskm_immediate_exec) {
551 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
552 "depth of serial team %p to %d\n",
553 global_tid, serial_team, serial_team->t.t_serialized));
554 }
555 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000556
Jonathan Peyton30419822017-05-12 18:01:32 +0000557 if (__kmp_env_consistency_check)
558 __kmp_pop_parallel(global_tid, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559}
560
561/*!
562@ingroup SYNCHRONIZATION
563@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000565Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000566depending on the memory ordering convention obeyed by the compiler
567even that may not be necessary).
568*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000569void __kmpc_flush(ident_t *loc) {
570 KC_TRACE(10, ("__kmpc_flush: called\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571
Jonathan Peyton30419822017-05-12 18:01:32 +0000572 /* need explicit __mf() here since use volatile instead in library */
573 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
576#if KMP_MIC
577// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
578// We shouldn't need it, though, since the ABI rules require that
579// * If the compiler generates NGO stores it also generates the fence
580// * If users hand-code NGO stores they should insert the fence
581// therefore no incomplete unordered stores should be visible.
582#else
583 // C74404
584 // This is to address non-temporal store instructions (sfence needed).
585 // The clflush instruction is addressed either (mfence needed).
586 // Probably the non-temporal load monvtdqa instruction should also be
587 // addressed.
588 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
589 if (!__kmp_cpuinfo.initialized) {
590 __kmp_query_cpuid(&__kmp_cpuinfo);
591 }; // if
592 if (!__kmp_cpuinfo.sse2) {
593 // CPU cannot execute SSE2 instructions.
594 } else {
595#if KMP_COMPILER_ICC
596 _mm_mfence();
597#elif KMP_COMPILER_MSVC
598 MemoryBarrier();
599#else
600 __sync_synchronize();
601#endif // KMP_COMPILER_ICC
602 }; // if
603#endif // KMP_MIC
604#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
605// Nothing to see here move along
606#elif KMP_ARCH_PPC64
607// Nothing needed here (we have a real MB above).
608#if KMP_OS_CNK
609 // The flushing thread needs to yield here; this prevents a
610 // busy-waiting thread from saturating the pipeline. flush is
611 // often used in loops like this:
612 // while (!flag) {
613 // #pragma omp flush(flag)
614 // }
615 // and adding the yield here is good for at least a 10x speedup
616 // when running >2 threads per core (on the NAS LU benchmark).
617 __kmp_yield(TRUE);
618#endif
619#else
620#error Unknown or unsupported architecture
621#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000622}
623
624/* -------------------------------------------------------------------------- */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000625/*!
626@ingroup SYNCHRONIZATION
627@param loc source location information
628@param global_tid thread id.
629
630Execute a barrier.
631*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000632void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
633 KMP_COUNT_BLOCK(OMP_BARRIER);
634 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635
Jonathan Peyton30419822017-05-12 18:01:32 +0000636 if (!TCR_4(__kmp_init_parallel))
637 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000638
Jonathan Peyton30419822017-05-12 18:01:32 +0000639 if (__kmp_env_consistency_check) {
640 if (loc == 0) {
641 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
642 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643
Jonathan Peyton30419822017-05-12 18:01:32 +0000644 __kmp_check_barrier(global_tid, ct_barrier, loc);
645 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000647#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000648 ompt_frame_t *ompt_frame;
649 if (ompt_enabled) {
650 ompt_frame = __ompt_get_task_frame_internal(0);
651 if (ompt_frame->reenter_runtime_frame == NULL)
652 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
653 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000654#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000655 __kmp_threads[global_tid]->th.th_ident = loc;
656 // TODO: explicit barrier_wait_id:
657 // this function is called when 'barrier' directive is present or
658 // implicit barrier at the end of a worksharing construct.
659 // 1) better to add a per-thread barrier counter to a thread data structure
660 // 2) set to 0 when a new team is created
661 // 4) no sync is required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662
Jonathan Peyton30419822017-05-12 18:01:32 +0000663 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000664#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000665 if (ompt_enabled) {
666 ompt_frame->reenter_runtime_frame = NULL;
667 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +0000668#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669}
670
671/* The BARRIER for a MASTER section is always explicit */
672/*!
673@ingroup WORK_SHARING
674@param loc source location information.
675@param global_tid global thread number .
676@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
677*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000678kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
679 int status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000680
Jonathan Peyton30419822017-05-12 18:01:32 +0000681 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682
Jonathan Peyton30419822017-05-12 18:01:32 +0000683 if (!TCR_4(__kmp_init_parallel))
684 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 if (KMP_MASTER_GTID(global_tid)) {
687 KMP_COUNT_BLOCK(OMP_MASTER);
688 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
689 status = 1;
690 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000692#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000693 if (status) {
694 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
695 kmp_info_t *this_thr = __kmp_threads[global_tid];
696 kmp_team_t *team = this_thr->th.th_team;
Jonathan Peyton122dd762015-07-13 18:55:45 +0000697
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 int tid = __kmp_tid_from_gtid(global_tid);
699 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
700 team->t.ompt_team_info.parallel_id,
701 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000702 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000703 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000704#endif
705
Jonathan Peyton30419822017-05-12 18:01:32 +0000706 if (__kmp_env_consistency_check) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000707#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000708 if (status)
709 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
710 else
711 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000712#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000713 if (status)
714 __kmp_push_sync(global_tid, ct_master, loc, NULL);
715 else
716 __kmp_check_sync(global_tid, ct_master, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000717#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000718 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000719
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721}
722
723/*!
724@ingroup WORK_SHARING
725@param loc source location information.
726@param global_tid global thread number .
727
Jonathan Peyton30419822017-05-12 18:01:32 +0000728Mark the end of a <tt>master</tt> region. This should only be called by the
729thread that executes the <tt>master</tt> region.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000731void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
732 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
735 KMP_POP_PARTITIONED_TIMER();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000737#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 kmp_info_t *this_thr = __kmp_threads[global_tid];
739 kmp_team_t *team = this_thr->th.th_team;
740 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) {
741 int tid = __kmp_tid_from_gtid(global_tid);
742 ompt_callbacks.ompt_callback(ompt_event_master_end)(
743 team->t.ompt_team_info.parallel_id,
744 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
745 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000746#endif
747
Jonathan Peyton30419822017-05-12 18:01:32 +0000748 if (__kmp_env_consistency_check) {
749 if (global_tid < 0)
750 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751
Jonathan Peyton30419822017-05-12 18:01:32 +0000752 if (KMP_MASTER_GTID(global_tid))
753 __kmp_pop_sync(global_tid, ct_master, loc);
754 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755}
756
757/*!
758@ingroup WORK_SHARING
759@param loc source location information.
760@param gtid global thread number.
761
762Start execution of an <tt>ordered</tt> construct.
763*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000764void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
765 int cid = 0;
766 kmp_info_t *th;
767 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768
Jonathan Peyton30419822017-05-12 18:01:32 +0000769 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 if (!TCR_4(__kmp_init_parallel))
772 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000773
774#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000775 __kmp_itt_ordered_prep(gtid);
776// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777#endif /* USE_ITT_BUILD */
778
Jonathan Peyton30419822017-05-12 18:01:32 +0000779 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000781#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000782 if (ompt_enabled) {
783 /* OMPT state update */
784 th->th.ompt_thread_info.wait_id = (uint64_t)loc;
785 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000786
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 /* OMPT event callback */
788 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
789 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
790 th->th.ompt_thread_info.wait_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000791 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000793#endif
794
Jonathan Peyton30419822017-05-12 18:01:32 +0000795 if (th->th.th_dispatch->th_deo_fcn != 0)
796 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
797 else
798 __kmp_parallel_deo(&gtid, &cid, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000799
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000800#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000801 if (ompt_enabled) {
802 /* OMPT state update */
803 th->th.ompt_thread_info.state = ompt_state_work_parallel;
804 th->th.ompt_thread_info.wait_id = 0;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000805
Jonathan Peyton30419822017-05-12 18:01:32 +0000806 /* OMPT event callback */
807 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
808 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
809 th->th.ompt_thread_info.wait_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000810 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000812#endif
813
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000815 __kmp_itt_ordered_start(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816#endif /* USE_ITT_BUILD */
817}
818
819/*!
820@ingroup WORK_SHARING
821@param loc source location information.
822@param gtid global thread number.
823
824End execution of an <tt>ordered</tt> construct.
825*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000826void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
827 int cid = 0;
828 kmp_info_t *th;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000829
Jonathan Peyton30419822017-05-12 18:01:32 +0000830 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
832#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000833 __kmp_itt_ordered_end(gtid);
834// TODO: ordered_wait_id
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835#endif /* USE_ITT_BUILD */
836
Jonathan Peyton30419822017-05-12 18:01:32 +0000837 th = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838
Jonathan Peyton30419822017-05-12 18:01:32 +0000839 if (th->th.th_dispatch->th_dxo_fcn != 0)
840 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
841 else
842 __kmp_parallel_dxo(&gtid, &cid, loc);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000843
844#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +0000845 if (ompt_enabled &&
846 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
847 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
848 th->th.ompt_thread_info.wait_id);
849 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000850#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000851}
852
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000853#if KMP_USE_DYNAMIC_LOCK
854
Jonathan Peytondae13d82015-12-11 21:57:06 +0000855static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +0000856__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
857 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
858 // Pointer to the allocated indirect lock is written to crit, while indexing
859 // is ignored.
860 void *idx;
861 kmp_indirect_lock_t **lck;
862 lck = (kmp_indirect_lock_t **)crit;
863 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
864 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
865 KMP_SET_I_LOCK_LOCATION(ilk, loc);
866 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
867 KA_TRACE(20,
868 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000869#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 __kmp_itt_critical_creating(ilk->lock, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000871#endif
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000872 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 if (status == 0) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000874#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000875 __kmp_itt_critical_destroyed(ilk->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000876#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000877 // We don't really need to destroy the unclaimed lock here since it will be
878 // cleaned up at program exit.
879 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
880 }
881 KMP_DEBUG_ASSERT(*lck != NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000882}
883
884// Fast-path acquire tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000885#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
886 { \
887 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
888 if (l->lk.poll != KMP_LOCK_FREE(tas) || \
889 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
890 KMP_LOCK_BUSY(gtid + 1, tas))) { \
891 kmp_uint32 spins; \
892 KMP_FSYNC_PREPARE(l); \
893 KMP_INIT_YIELD(spins); \
894 if (TCR_4(__kmp_nth) > \
895 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
896 KMP_YIELD(TRUE); \
897 } else { \
898 KMP_YIELD_SPIN(spins); \
899 } \
900 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
901 while (l->lk.poll != KMP_LOCK_FREE(tas) || \
902 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
903 KMP_LOCK_BUSY(gtid + 1, tas))) { \
904 __kmp_spin_backoff(&backoff); \
905 if (TCR_4(__kmp_nth) > \
906 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
907 KMP_YIELD(TRUE); \
908 } else { \
909 KMP_YIELD_SPIN(spins); \
910 } \
911 } \
912 } \
913 KMP_FSYNC_ACQUIRED(l); \
914 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000915
916// Fast-path test tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000917#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
918 { \
919 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
920 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
921 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
922 KMP_LOCK_BUSY(gtid + 1, tas)); \
923 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000924
925// Fast-path release tas lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000926#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
927 { \
928 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
929 KMP_MB(); \
930 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000931
Jonathan Peytondae13d82015-12-11 21:57:06 +0000932#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000933
Jonathan Peyton30419822017-05-12 18:01:32 +0000934#include <sys/syscall.h>
935#include <unistd.h>
936#ifndef FUTEX_WAIT
937#define FUTEX_WAIT 0
938#endif
939#ifndef FUTEX_WAKE
940#define FUTEX_WAKE 1
941#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000942
943// Fast-path acquire futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000944#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
945 { \
946 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
947 kmp_int32 gtid_code = (gtid + 1) << 1; \
948 KMP_MB(); \
949 KMP_FSYNC_PREPARE(ftx); \
950 kmp_int32 poll_val; \
951 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
952 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
953 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
954 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
955 if (!cond) { \
956 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
957 poll_val | \
958 KMP_LOCK_BUSY(1, futex))) { \
959 continue; \
960 } \
961 poll_val |= KMP_LOCK_BUSY(1, futex); \
962 } \
963 kmp_int32 rc; \
964 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
965 NULL, NULL, 0)) != 0) { \
966 continue; \
967 } \
968 gtid_code |= 1; \
969 } \
970 KMP_FSYNC_ACQUIRED(ftx); \
971 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000972
973// Fast-path test futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000974#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
975 { \
976 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
977 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
978 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
979 KMP_FSYNC_ACQUIRED(ftx); \
980 rc = TRUE; \
981 } else { \
982 rc = FALSE; \
983 } \
984 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000985
986// Fast-path release futex lock
Jonathan Peyton30419822017-05-12 18:01:32 +0000987#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
988 { \
989 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
990 KMP_MB(); \
991 KMP_FSYNC_RELEASING(ftx); \
992 kmp_int32 poll_val = \
993 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
994 if (KMP_LOCK_STRIP(poll_val) & 1) { \
995 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
996 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
997 } \
998 KMP_MB(); \
999 KMP_YIELD(TCR_4(__kmp_nth) > \
1000 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1001 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001002
Jonathan Peytondae13d82015-12-11 21:57:06 +00001003#endif // KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001004
1005#else // KMP_USE_DYNAMIC_LOCK
1006
Jonathan Peyton30419822017-05-12 18:01:32 +00001007static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1008 ident_t const *loc,
1009 kmp_int32 gtid) {
1010 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011
Jonathan Peyton30419822017-05-12 18:01:32 +00001012 // Because of the double-check, the following load doesn't need to be volatile
1013 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014
Jonathan Peyton30419822017-05-12 18:01:32 +00001015 if (lck == NULL) {
1016 void *idx;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017
Jonathan Peyton30419822017-05-12 18:01:32 +00001018 // Allocate & initialize the lock.
1019 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1020 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1021 __kmp_init_user_lock_with_checks(lck);
1022 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001024 __kmp_itt_critical_creating(lck);
1025// __kmp_itt_critical_creating() should be called *before* the first usage
1026// of underlying lock. It is the only place where we can guarantee it. There
1027// are chances the lock will destroyed with no usage, but it is not a
1028// problem, because this is not real event seen by user but rather setting
1029// name for object (lock). See more details in kmp_itt.h.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030#endif /* USE_ITT_BUILD */
1031
Jonathan Peyton30419822017-05-12 18:01:32 +00001032 // Use a cmpxchg instruction to slam the start of the critical section with
1033 // the lock pointer. If another thread beat us to it, deallocate the lock,
1034 // and use the lock that the other thread allocated.
1035 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001036
Jonathan Peyton30419822017-05-12 18:01:32 +00001037 if (status == 0) {
1038// Deallocate the lock and reload the value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001039#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001040 __kmp_itt_critical_destroyed(lck);
1041// Let ITT know the lock is destroyed and the same memory location may be reused
1042// for another purpose.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001043#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001044 __kmp_destroy_user_lock_with_checks(lck);
1045 __kmp_user_lock_free(&idx, gtid, lck);
1046 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1047 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001048 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001049 }
1050 return lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001051}
1052
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001053#endif // KMP_USE_DYNAMIC_LOCK
1054
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055/*!
1056@ingroup WORK_SHARING
1057@param loc source location information.
1058@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001059@param crit identity of the critical section. This could be a pointer to a lock
1060associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061
1062Enter code protected by a `critical` construct.
1063This function blocks until the executing thread can enter the critical section.
1064*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001065void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1066 kmp_critical_name *crit) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001067#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001068 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001069#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001070 KMP_COUNT_BLOCK(OMP_CRITICAL);
1071 KMP_TIME_PARTITIONED_BLOCK(
1072 OMP_critical_wait); /* Time spent waiting to enter the critical section */
1073 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001074
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001076
Jonathan Peyton30419822017-05-12 18:01:32 +00001077 // TODO: add THR_OVHD_STATE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080
Jonathan Peyton30419822017-05-12 18:01:32 +00001081 if ((__kmp_user_lock_kind == lk_tas) &&
1082 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1083 lck = (kmp_user_lock_p)crit;
1084 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001085#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001086 else if ((__kmp_user_lock_kind == lk_futex) &&
1087 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1088 lck = (kmp_user_lock_p)crit;
1089 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001091 else { // ticket, queuing or drdpa
1092 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1093 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094
Jonathan Peyton30419822017-05-12 18:01:32 +00001095 if (__kmp_env_consistency_check)
1096 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097
Jonathan Peyton30419822017-05-12 18:01:32 +00001098// since the critical directive binds to all threads, not just the current
1099// team we have to check this even if we are in a serialized team.
1100// also, even if we are the uber thread, we still have to conduct the lock,
1101// as we have to contend with sibling threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001102
1103#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001104 __kmp_itt_critical_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001105#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001106 // Value of 'crit' should be good for using as a critical_id of the critical
1107 // section directive.
1108 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001110#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 __kmp_itt_critical_acquired(lck);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001112#endif /* USE_ITT_BUILD */
1113
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 KMP_START_EXPLICIT_TIMER(OMP_critical);
1115 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001116#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001117}
1118
1119#if KMP_USE_DYNAMIC_LOCK
1120
1121// Converts the given hint to an internal lock implementation
Jonathan Peyton30419822017-05-12 18:01:32 +00001122static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001123#if KMP_USE_TSX
Jonathan Peyton30419822017-05-12 18:01:32 +00001124#define KMP_TSX_LOCK(seq) lockseq_##seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001125#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001126#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001127#endif
Hal Finkel01bb2402016-03-27 13:24:09 +00001128
1129#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00001130#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
Hal Finkel01bb2402016-03-27 13:24:09 +00001131#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001132#define KMP_CPUINFO_RTM 0
Hal Finkel01bb2402016-03-27 13:24:09 +00001133#endif
1134
Jonathan Peyton30419822017-05-12 18:01:32 +00001135 // Hints that do not require further logic
1136 if (hint & kmp_lock_hint_hle)
1137 return KMP_TSX_LOCK(hle);
1138 if (hint & kmp_lock_hint_rtm)
1139 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1140 if (hint & kmp_lock_hint_adaptive)
1141 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001142
Jonathan Peyton30419822017-05-12 18:01:32 +00001143 // Rule out conflicting hints first by returning the default lock
1144 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001145 return __kmp_user_lock_seq;
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 if ((hint & omp_lock_hint_speculative) &&
1147 (hint & omp_lock_hint_nonspeculative))
1148 return __kmp_user_lock_seq;
1149
1150 // Do not even consider speculation when it appears to be contended
1151 if (hint & omp_lock_hint_contended)
1152 return lockseq_queuing;
1153
1154 // Uncontended lock without speculation
1155 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1156 return lockseq_tas;
1157
1158 // HLE lock for speculation
1159 if (hint & omp_lock_hint_speculative)
1160 return KMP_TSX_LOCK(hle);
1161
1162 return __kmp_user_lock_seq;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001163}
1164
1165/*!
1166@ingroup WORK_SHARING
1167@param loc source location information.
1168@param global_tid global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +00001169@param crit identity of the critical section. This could be a pointer to a lock
1170associated with the critical section, or some other suitably unique value.
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001171@param hint the lock hint.
1172
Jonathan Peyton30419822017-05-12 18:01:32 +00001173Enter code protected by a `critical` construct with a hint. The hint value is
1174used to suggest a lock implementation. This function blocks until the executing
1175thread can enter the critical section unless the hint suggests use of
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001176speculative execution and the hardware supports it.
1177*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001178void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1179 kmp_critical_name *crit, uintptr_t hint) {
1180 KMP_COUNT_BLOCK(OMP_CRITICAL);
1181 kmp_user_lock_p lck;
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001182
Jonathan Peyton30419822017-05-12 18:01:32 +00001183 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001184
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1186 // Check if it is initialized.
1187 if (*lk == 0) {
1188 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1189 if (KMP_IS_D_LOCK(lckseq)) {
1190 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1191 KMP_GET_D_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001192 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001193 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001194 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 }
1196 // Branch for accessing the actual lock object and set operation. This
1197 // branching is inevitable since this lock initialization does not follow the
1198 // normal dispatch path (lock table is not used).
1199 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1200 lck = (kmp_user_lock_p)lk;
1201 if (__kmp_env_consistency_check) {
1202 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1203 __kmp_map_hint_to_lock(hint));
1204 }
1205#if USE_ITT_BUILD
1206 __kmp_itt_critical_acquiring(lck);
1207#endif
1208#if KMP_USE_INLINED_TAS
1209 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1210 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1211 } else
1212#elif KMP_USE_INLINED_FUTEX
1213 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1214 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1215 } else
1216#endif
1217 {
1218 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1219 }
1220 } else {
1221 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1222 lck = ilk->lock;
1223 if (__kmp_env_consistency_check) {
1224 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1225 __kmp_map_hint_to_lock(hint));
1226 }
1227#if USE_ITT_BUILD
1228 __kmp_itt_critical_acquiring(lck);
1229#endif
1230 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1231 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001232
Jim Cownie5e8470a2013-09-27 10:38:44 +00001233#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 __kmp_itt_critical_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001235#endif /* USE_ITT_BUILD */
1236
Jonathan Peyton30419822017-05-12 18:01:32 +00001237 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1238 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001239} // __kmpc_critical_with_hint
1240
1241#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001242
1243/*!
1244@ingroup WORK_SHARING
1245@param loc source location information.
1246@param global_tid global thread number .
Jonathan Peyton30419822017-05-12 18:01:32 +00001247@param crit identity of the critical section. This could be a pointer to a lock
1248associated with the critical section, or some other suitably unique value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249
1250Leave a critical section, releasing any lock that was held during its execution.
1251*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001252void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1253 kmp_critical_name *crit) {
1254 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001255
Jonathan Peyton30419822017-05-12 18:01:32 +00001256 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001257
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001258#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001259 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1260 lck = (kmp_user_lock_p)crit;
1261 KMP_ASSERT(lck != NULL);
1262 if (__kmp_env_consistency_check) {
1263 __kmp_pop_sync(global_tid, ct_critical, loc);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001264 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001265#if USE_ITT_BUILD
1266 __kmp_itt_critical_releasing(lck);
1267#endif
1268#if KMP_USE_INLINED_TAS
1269 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1270 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1271 } else
1272#elif KMP_USE_INLINED_FUTEX
1273 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1274 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1275 } else
1276#endif
1277 {
1278 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1279 }
1280 } else {
1281 kmp_indirect_lock_t *ilk =
1282 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1283 KMP_ASSERT(ilk != NULL);
1284 lck = ilk->lock;
1285 if (__kmp_env_consistency_check) {
1286 __kmp_pop_sync(global_tid, ct_critical, loc);
1287 }
1288#if USE_ITT_BUILD
1289 __kmp_itt_critical_releasing(lck);
1290#endif
1291 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1292 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001293
1294#else // KMP_USE_DYNAMIC_LOCK
1295
Jonathan Peyton30419822017-05-12 18:01:32 +00001296 if ((__kmp_user_lock_kind == lk_tas) &&
1297 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1298 lck = (kmp_user_lock_p)crit;
1299 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001300#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001301 else if ((__kmp_user_lock_kind == lk_futex) &&
1302 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1303 lck = (kmp_user_lock_p)crit;
1304 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001305#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001306 else { // ticket, queuing or drdpa
1307 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309
Jonathan Peyton30419822017-05-12 18:01:32 +00001310 KMP_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001311
Jonathan Peyton30419822017-05-12 18:01:32 +00001312 if (__kmp_env_consistency_check)
1313 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001314
1315#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001316 __kmp_itt_critical_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001317#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 // Value of 'crit' should be good for using as a critical_id of the critical
1319 // section directive.
1320 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001321
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001322#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00001323 if (ompt_enabled &&
1324 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1325 ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck);
1326 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001327#endif
1328
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001329#endif // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001330 KMP_POP_PARTITIONED_TIMER();
1331 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001332}
1333
1334/*!
1335@ingroup SYNCHRONIZATION
1336@param loc source location information
1337@param global_tid thread id.
1338@return one if the thread should execute the master block, zero otherwise
1339
Jonathan Peyton30419822017-05-12 18:01:32 +00001340Start execution of a combined barrier and master. The barrier is executed inside
1341this function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001343kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1344 int status;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001345
Jonathan Peyton30419822017-05-12 18:01:32 +00001346 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001347
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 if (!TCR_4(__kmp_init_parallel))
1349 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001350
Jonathan Peyton30419822017-05-12 18:01:32 +00001351 if (__kmp_env_consistency_check)
1352 __kmp_check_barrier(global_tid, ct_barrier, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001353
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001354#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001356#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001357 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001358
Jonathan Peyton30419822017-05-12 18:01:32 +00001359 return (status != 0) ? 0 : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001360}
1361
1362/*!
1363@ingroup SYNCHRONIZATION
1364@param loc source location information
1365@param global_tid thread id.
1366
1367Complete the execution of a combined barrier and master. This function should
1368only be called at the completion of the <tt>master</tt> code. Other threads will
1369still be waiting at the barrier and this call releases them.
1370*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001371void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1372 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373
Jonathan Peyton30419822017-05-12 18:01:32 +00001374 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001375}
1376
1377/*!
1378@ingroup SYNCHRONIZATION
1379@param loc source location information
1380@param global_tid thread id.
1381@return one if the thread should execute the master block, zero otherwise
1382
1383Start execution of a combined barrier and master(nowait) construct.
1384The barrier is executed inside this function.
1385There is no equivalent "end" function, since the
1386*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001387kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1388 kmp_int32 ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001389
Jonathan Peyton30419822017-05-12 18:01:32 +00001390 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391
Jonathan Peyton30419822017-05-12 18:01:32 +00001392 if (!TCR_4(__kmp_init_parallel))
1393 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394
Jonathan Peyton30419822017-05-12 18:01:32 +00001395 if (__kmp_env_consistency_check) {
1396 if (loc == 0) {
1397 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001398 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001399 __kmp_check_barrier(global_tid, ct_barrier, loc);
1400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001401
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001402#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001403 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001404#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001405 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406
Jonathan Peyton30419822017-05-12 18:01:32 +00001407 ret = __kmpc_master(loc, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408
Jonathan Peyton30419822017-05-12 18:01:32 +00001409 if (__kmp_env_consistency_check) {
1410 /* there's no __kmpc_end_master called; so the (stats) */
1411 /* actions of __kmpc_end_master are done here */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 if (global_tid < 0) {
1414 KMP_WARNING(ThreadIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001415 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001416 if (ret) {
1417 /* only one thread should do the pop since only */
1418 /* one did the push (see __kmpc_master()) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001419
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 __kmp_pop_sync(global_tid, ct_master, loc);
1421 }
1422 }
1423
1424 return (ret);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001425}
1426
1427/* The BARRIER for a SINGLE process section is always explicit */
1428/*!
1429@ingroup WORK_SHARING
1430@param loc source location information
1431@param global_tid global thread number
1432@return One if this thread should execute the single construct, zero otherwise.
1433
1434Test whether to execute a <tt>single</tt> construct.
Jonathan Peyton30419822017-05-12 18:01:32 +00001435There are no implicit barriers in the two "single" calls, rather the compiler
1436should introduce an explicit barrier if it is required.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437*/
1438
Jonathan Peyton30419822017-05-12 18:01:32 +00001439kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1440 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
Jonathan Peyton30138252016-03-03 21:21:05 +00001441
Jonathan Peyton30419822017-05-12 18:01:32 +00001442 if (rc) {
1443 // We are going to execute the single statement, so we should count it.
1444 KMP_COUNT_BLOCK(OMP_SINGLE);
1445 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1446 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001447
1448#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001449 kmp_info_t *this_thr = __kmp_threads[global_tid];
1450 kmp_team_t *team = this_thr->th.th_team;
1451 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001452
Jonathan Peyton30419822017-05-12 18:01:32 +00001453 if (ompt_enabled) {
1454 if (rc) {
1455 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1456 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1457 team->t.ompt_team_info.parallel_id,
1458 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1459 team->t.ompt_team_info.microtask);
1460 }
1461 } else {
1462 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1463 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1464 team->t.ompt_team_info.parallel_id,
1465 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1466 }
1467 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001468 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001469 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001470#endif
1471
Jonathan Peyton30419822017-05-12 18:01:32 +00001472 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473}
1474
1475/*!
1476@ingroup WORK_SHARING
1477@param loc source location information
1478@param global_tid global thread number
1479
1480Mark the end of a <tt>single</tt> construct. This function should
1481only be called by the thread that executed the block of code protected
1482by the `single` construct.
1483*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001484void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1485 __kmp_exit_single(global_tid);
1486 KMP_POP_PARTITIONED_TIMER();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001487
1488#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001489 kmp_info_t *this_thr = __kmp_threads[global_tid];
1490 kmp_team_t *team = this_thr->th.th_team;
1491 int tid = __kmp_tid_from_gtid(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001492
Jonathan Peyton30419822017-05-12 18:01:32 +00001493 if (ompt_enabled &&
1494 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1495 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1496 team->t.ompt_team_info.parallel_id,
1497 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1498 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001499#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500}
1501
1502/*!
1503@ingroup WORK_SHARING
1504@param loc Source location
1505@param global_tid Global thread id
1506
1507Mark the end of a statically scheduled loop.
1508*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001509void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1510 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001512#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001513 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1514 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1515 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1516 ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id,
1517 task_info->task_id);
1518 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001519#endif
1520
Jonathan Peyton30419822017-05-12 18:01:32 +00001521 if (__kmp_env_consistency_check)
1522 __kmp_pop_workshare(global_tid, ct_pdo, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001523}
1524
Jonathan Peyton30419822017-05-12 18:01:32 +00001525// User routines which take C-style arguments (call by value)
1526// different from the Fortran equivalent routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001527
Jonathan Peyton30419822017-05-12 18:01:32 +00001528void ompc_set_num_threads(int arg) {
1529 // !!!!! TODO: check the per-task binding
1530 __kmp_set_num_threads(arg, __kmp_entry_gtid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001531}
1532
Jonathan Peyton30419822017-05-12 18:01:32 +00001533void ompc_set_dynamic(int flag) {
1534 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535
Jonathan Peyton30419822017-05-12 18:01:32 +00001536 /* For the thread-private implementation of the internal controls */
1537 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001538
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001540
Jonathan Peyton30419822017-05-12 18:01:32 +00001541 set__dynamic(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001542}
1543
Jonathan Peyton30419822017-05-12 18:01:32 +00001544void ompc_set_nested(int flag) {
1545 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001546
Jonathan Peyton30419822017-05-12 18:01:32 +00001547 /* For the thread-private internal controls implementation */
1548 thread = __kmp_entry_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001549
Jonathan Peyton30419822017-05-12 18:01:32 +00001550 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551
Jonathan Peyton30419822017-05-12 18:01:32 +00001552 set__nested(thread, flag ? TRUE : FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553}
1554
Jonathan Peyton30419822017-05-12 18:01:32 +00001555void ompc_set_max_active_levels(int max_active_levels) {
1556 /* TO DO */
1557 /* we want per-task implementation of this internal control */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 /* For the per-thread internal controls implementation */
1560 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001561}
1562
Jonathan Peyton30419822017-05-12 18:01:32 +00001563void ompc_set_schedule(omp_sched_t kind, int modifier) {
1564 // !!!!! TODO: check the per-task binding
1565 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566}
1567
Jonathan Peyton30419822017-05-12 18:01:32 +00001568int ompc_get_ancestor_thread_num(int level) {
1569 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570}
1571
Jonathan Peyton30419822017-05-12 18:01:32 +00001572int ompc_get_team_size(int level) {
1573 return __kmp_get_team_size(__kmp_entry_gtid(), level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001574}
1575
Jonathan Peyton30419822017-05-12 18:01:32 +00001576void kmpc_set_stacksize(int arg) {
1577 // __kmp_aux_set_stacksize initializes the library if needed
1578 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579}
1580
Jonathan Peyton30419822017-05-12 18:01:32 +00001581void kmpc_set_stacksize_s(size_t arg) {
1582 // __kmp_aux_set_stacksize initializes the library if needed
1583 __kmp_aux_set_stacksize(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584}
1585
Jonathan Peyton30419822017-05-12 18:01:32 +00001586void kmpc_set_blocktime(int arg) {
1587 int gtid, tid;
1588 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589
Jonathan Peyton30419822017-05-12 18:01:32 +00001590 gtid = __kmp_entry_gtid();
1591 tid = __kmp_tid_from_gtid(gtid);
1592 thread = __kmp_thread_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 __kmp_aux_set_blocktime(arg, thread, tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595}
1596
Jonathan Peyton30419822017-05-12 18:01:32 +00001597void kmpc_set_library(int arg) {
1598 // __kmp_user_set_library initializes the library if needed
1599 __kmp_user_set_library((enum library_type)arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600}
1601
Jonathan Peyton30419822017-05-12 18:01:32 +00001602void kmpc_set_defaults(char const *str) {
1603 // __kmp_aux_set_defaults initializes the library if needed
1604 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605}
1606
Jonathan Peyton30419822017-05-12 18:01:32 +00001607void kmpc_set_disp_num_buffers(int arg) {
1608 // ignore after initialization because some teams have already
1609 // allocated dispatch buffers
1610 if (__kmp_init_serial == 0 && arg > 0)
1611 __kmp_dispatch_num_buffers = arg;
Jonathan Peyton067325f2016-05-31 19:01:15 +00001612}
1613
Jonathan Peyton30419822017-05-12 18:01:32 +00001614int kmpc_set_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001615#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001616 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001617#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 if (!TCR_4(__kmp_init_middle)) {
1619 __kmp_middle_initialize();
1620 }
1621 return __kmp_aux_set_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001622#endif
1623}
1624
Jonathan Peyton30419822017-05-12 18:01:32 +00001625int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001626#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001627 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001628#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001629 if (!TCR_4(__kmp_init_middle)) {
1630 __kmp_middle_initialize();
1631 }
1632 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001633#endif
1634}
1635
Jonathan Peyton30419822017-05-12 18:01:32 +00001636int kmpc_get_affinity_mask_proc(int proc, void **mask) {
Alp Toker98758b02014-03-02 04:12:06 +00001637#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 if (!TCR_4(__kmp_init_middle)) {
1641 __kmp_middle_initialize();
1642 }
1643 return __kmp_aux_get_affinity_mask_proc(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001644#endif
1645}
1646
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647/* -------------------------------------------------------------------------- */
1648/*!
1649@ingroup THREADPRIVATE
1650@param loc source location information
1651@param gtid global thread number
1652@param cpy_size size of the cpy_data buffer
1653@param cpy_data pointer to data to be copied
1654@param cpy_func helper function to call for copying data
1655@param didit flag variable: 1=single thread; 0=not single thread
1656
Jonathan Peyton30419822017-05-12 18:01:32 +00001657__kmpc_copyprivate implements the interface for the private data broadcast
1658needed for the copyprivate clause associated with a single region in an
1659OpenMP<sup>*</sup> program (both C and Fortran).
Jim Cownie5e8470a2013-09-27 10:38:44 +00001660All threads participating in the parallel region call this routine.
Jonathan Peyton30419822017-05-12 18:01:32 +00001661One of the threads (called the single thread) should have the <tt>didit</tt>
1662variable set to 1 and all other threads should have that variable set to 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001663All threads pass a pointer to a data buffer (cpy_data) that they have built.
1664
Jonathan Peyton30419822017-05-12 18:01:32 +00001665The OpenMP specification forbids the use of nowait on the single region when a
1666copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1667barrier internally to avoid race conditions, so the code generation for the
1668single region should avoid generating a barrier after the call to @ref
1669__kmpc_copyprivate.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
1671The <tt>gtid</tt> parameter is the global thread id for the current thread.
1672The <tt>loc</tt> parameter is a pointer to source location information.
1673
Jonathan Peyton30419822017-05-12 18:01:32 +00001674Internal implementation: The single thread will first copy its descriptor
1675address (cpy_data) to a team-private location, then the other threads will each
1676call the function pointed to by the parameter cpy_func, which carries out the
1677copy by copying the data using the cpy_data buffer.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678
Jonathan Peyton30419822017-05-12 18:01:32 +00001679The cpy_func routine used for the copy and the contents of the data area defined
1680by cpy_data and cpy_size may be built in any fashion that will allow the copy
1681to be done. For instance, the cpy_data buffer can hold the actual data to be
1682copied or it may hold a list of pointers to the data. The cpy_func routine must
1683interpret the cpy_data buffer appropriately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001684
1685The interface to cpy_func is as follows:
1686@code
1687void cpy_func( void *destination, void *source )
1688@endcode
1689where void *destination is the cpy_data pointer for the thread being copied to
1690and void *source is the cpy_data pointer for the thread being copied from.
1691*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001692void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1693 void *cpy_data, void (*cpy_func)(void *, void *),
1694 kmp_int32 didit) {
1695 void **data_ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698
Jonathan Peyton30419822017-05-12 18:01:32 +00001699 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700
Jonathan Peyton30419822017-05-12 18:01:32 +00001701 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 if (__kmp_env_consistency_check) {
1704 if (loc == 0) {
1705 KMP_WARNING(ConstructIdentInvalid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001707 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001708
Jonathan Peyton30419822017-05-12 18:01:32 +00001709 // ToDo: Optimize the following two barriers into some kind of split barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00001710
Jonathan Peyton30419822017-05-12 18:01:32 +00001711 if (didit)
1712 *data_ptr = cpy_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713
Jonathan Peyton30419822017-05-12 18:01:32 +00001714/* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001715#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 __kmp_threads[gtid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001717#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001719
Jonathan Peyton30419822017-05-12 18:01:32 +00001720 if (!didit)
1721 (*cpy_func)(cpy_data, *data_ptr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722
Jonathan Peyton30419822017-05-12 18:01:32 +00001723// Consider next barrier a user-visible barrier for barrier region boundaries
1724// Nesting checks are already handled by the single construct checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001725
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001726#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
1728// tasks can overwrite the location)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001729#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001730 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001731}
1732
1733/* -------------------------------------------------------------------------- */
1734
Jonathan Peyton30419822017-05-12 18:01:32 +00001735#define INIT_LOCK __kmp_init_user_lock_with_checks
1736#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1737#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1738#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1739#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1740#define ACQUIRE_NESTED_LOCK_TIMED \
1741 __kmp_acquire_nested_user_lock_with_checks_timed
1742#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1743#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1744#define TEST_LOCK __kmp_test_user_lock_with_checks
1745#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1746#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1747#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001748
Jonathan Peyton30419822017-05-12 18:01:32 +00001749// TODO: Make check abort messages use location info & pass it into
1750// with_checks routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001752#if KMP_USE_DYNAMIC_LOCK
1753
1754// internal lock initializer
Jonathan Peyton30419822017-05-12 18:01:32 +00001755static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
1756 kmp_dyna_lockseq_t seq) {
1757 if (KMP_IS_D_LOCK(seq)) {
1758 KMP_INIT_D_LOCK(lock, seq);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001759#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001761#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001762 } else {
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001763 KMP_INIT_I_LOCK(lock, seq);
1764#if USE_ITT_BUILD
1765 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1766 __kmp_itt_lock_creating(ilk->lock, loc);
1767#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001768 }
1769}
1770
1771// internal nest lock initializer
1772static __forceinline void
1773__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
1774 kmp_dyna_lockseq_t seq) {
1775#if KMP_USE_TSX
1776 // Don't have nested lock implementation for speculative locks
1777 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1778 seq = __kmp_user_lock_seq;
1779#endif
1780 switch (seq) {
1781 case lockseq_tas:
1782 seq = lockseq_nested_tas;
1783 break;
1784#if KMP_USE_FUTEX
1785 case lockseq_futex:
1786 seq = lockseq_nested_futex;
1787 break;
1788#endif
1789 case lockseq_ticket:
1790 seq = lockseq_nested_ticket;
1791 break;
1792 case lockseq_queuing:
1793 seq = lockseq_nested_queuing;
1794 break;
1795 case lockseq_drdpa:
1796 seq = lockseq_nested_drdpa;
1797 break;
1798 default:
1799 seq = lockseq_nested_queuing;
1800 }
1801 KMP_INIT_I_LOCK(lock, seq);
1802#if USE_ITT_BUILD
1803 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1804 __kmp_itt_lock_creating(ilk->lock, loc);
1805#endif
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001806}
1807
1808/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00001809void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
1810 uintptr_t hint) {
1811 KMP_DEBUG_ASSERT(__kmp_init_serial);
1812 if (__kmp_env_consistency_check && user_lock == NULL) {
1813 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1814 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001815
Jonathan Peyton30419822017-05-12 18:01:32 +00001816 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001817}
1818
1819/* initialize the lock with a hint */
Jonathan Peyton30419822017-05-12 18:01:32 +00001820void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
1821 void **user_lock, uintptr_t hint) {
1822 KMP_DEBUG_ASSERT(__kmp_init_serial);
1823 if (__kmp_env_consistency_check && user_lock == NULL) {
1824 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1825 }
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001826
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
Jonathan Peytonb87b5812015-12-11 22:04:05 +00001828}
1829
1830#endif // KMP_USE_DYNAMIC_LOCK
1831
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00001833void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001834#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00001835
1836 KMP_DEBUG_ASSERT(__kmp_init_serial);
1837 if (__kmp_env_consistency_check && user_lock == NULL) {
1838 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1839 }
1840 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001841
1842#else // KMP_USE_DYNAMIC_LOCK
1843
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 static char const *const func = "omp_init_lock";
1845 kmp_user_lock_p lck;
1846 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847
Jonathan Peyton30419822017-05-12 18:01:32 +00001848 if (__kmp_env_consistency_check) {
1849 if (user_lock == NULL) {
1850 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001851 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001852 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 if ((__kmp_user_lock_kind == lk_tas) &&
1857 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1858 lck = (kmp_user_lock_p)user_lock;
1859 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001860#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001861 else if ((__kmp_user_lock_kind == lk_futex) &&
1862 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1863 lck = (kmp_user_lock_p)user_lock;
1864 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 else {
1867 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
1868 }
1869 INIT_LOCK(lck);
1870 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001872#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001873 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1874 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck);
1875 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001876#endif
1877
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001879 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001881
1882#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883} // __kmpc_init_lock
1884
1885/* initialize the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00001886void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001887#if KMP_USE_DYNAMIC_LOCK
1888
Jonathan Peyton30419822017-05-12 18:01:32 +00001889 KMP_DEBUG_ASSERT(__kmp_init_serial);
1890 if (__kmp_env_consistency_check && user_lock == NULL) {
1891 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1892 }
1893 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001894
1895#else // KMP_USE_DYNAMIC_LOCK
1896
Jonathan Peyton30419822017-05-12 18:01:32 +00001897 static char const *const func = "omp_init_nest_lock";
1898 kmp_user_lock_p lck;
1899 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900
Jonathan Peyton30419822017-05-12 18:01:32 +00001901 if (__kmp_env_consistency_check) {
1902 if (user_lock == NULL) {
1903 KMP_FATAL(LockIsUninitialized, func);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001905 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001906
Jonathan Peyton30419822017-05-12 18:01:32 +00001907 KMP_CHECK_USER_LOCK_INIT();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001908
Jonathan Peyton30419822017-05-12 18:01:32 +00001909 if ((__kmp_user_lock_kind == lk_tas) &&
1910 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
1911 OMP_NEST_LOCK_T_SIZE)) {
1912 lck = (kmp_user_lock_p)user_lock;
1913 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001914#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 else if ((__kmp_user_lock_kind == lk_futex) &&
1916 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
1917 OMP_NEST_LOCK_T_SIZE)) {
1918 lck = (kmp_user_lock_p)user_lock;
1919 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001921 else {
1922 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
1923 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924
Jonathan Peyton30419822017-05-12 18:01:32 +00001925 INIT_NESTED_LOCK(lck);
1926 __kmp_set_user_lock_location(lck, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001928#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001929 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1930 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck);
1931 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001932#endif
1933
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001935 __kmp_itt_lock_creating(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001937
1938#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939} // __kmpc_init_nest_lock
1940
Jonathan Peyton30419822017-05-12 18:01:32 +00001941void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001942#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943
Jonathan Peyton30419822017-05-12 18:01:32 +00001944#if USE_ITT_BUILD
1945 kmp_user_lock_p lck;
1946 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
1947 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
1948 } else {
1949 lck = (kmp_user_lock_p)user_lock;
1950 }
1951 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001953 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1954#else
1955 kmp_user_lock_p lck;
1956
1957 if ((__kmp_user_lock_kind == lk_tas) &&
1958 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1959 lck = (kmp_user_lock_p)user_lock;
1960 }
1961#if KMP_USE_FUTEX
1962 else if ((__kmp_user_lock_kind == lk_futex) &&
1963 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1964 lck = (kmp_user_lock_p)user_lock;
1965 }
1966#endif
1967 else {
1968 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
1969 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001971#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001972 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
1973 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck);
1974 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001975#endif
1976
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001978 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001979#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001980 DESTROY_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981
Jonathan Peyton30419822017-05-12 18:01:32 +00001982 if ((__kmp_user_lock_kind == lk_tas) &&
1983 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1984 ;
1985 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00001986#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 else if ((__kmp_user_lock_kind == lk_futex) &&
1988 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1989 ;
1990 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 else {
1993 __kmp_user_lock_free(user_lock, gtid, lck);
1994 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001995#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996} // __kmpc_destroy_lock
1997
1998/* destroy the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00001999void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002000#if KMP_USE_DYNAMIC_LOCK
2001
Jonathan Peyton30419822017-05-12 18:01:32 +00002002#if USE_ITT_BUILD
2003 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2004 __kmp_itt_lock_destroyed(ilk->lock);
2005#endif
2006 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002007
2008#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009
Jonathan Peyton30419822017-05-12 18:01:32 +00002010 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 if ((__kmp_user_lock_kind == lk_tas) &&
2013 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2014 OMP_NEST_LOCK_T_SIZE)) {
2015 lck = (kmp_user_lock_p)user_lock;
2016 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002017#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002018 else if ((__kmp_user_lock_kind == lk_futex) &&
2019 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2020 OMP_NEST_LOCK_T_SIZE)) {
2021 lck = (kmp_user_lock_p)user_lock;
2022 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002024 else {
2025 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2026 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002028#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00002029 if (ompt_enabled &&
2030 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2031 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck);
2032 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002033#endif
2034
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002036 __kmp_itt_lock_destroyed(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037#endif /* USE_ITT_BUILD */
2038
Jonathan Peyton30419822017-05-12 18:01:32 +00002039 DESTROY_NESTED_LOCK(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040
Jonathan Peyton30419822017-05-12 18:01:32 +00002041 if ((__kmp_user_lock_kind == lk_tas) &&
2042 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2043 OMP_NEST_LOCK_T_SIZE)) {
2044 ;
2045 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002046#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002047 else if ((__kmp_user_lock_kind == lk_futex) &&
2048 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2049 OMP_NEST_LOCK_T_SIZE)) {
2050 ;
2051 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002053 else {
2054 __kmp_user_lock_free(user_lock, gtid, lck);
2055 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002056#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057} // __kmpc_destroy_nest_lock
2058
Jonathan Peyton30419822017-05-12 18:01:32 +00002059void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2060 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002061#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002062 int tag = KMP_EXTRACT_D_TAG(user_lock);
2063#if USE_ITT_BUILD
2064 __kmp_itt_lock_acquiring(
2065 (kmp_user_lock_p)
2066 user_lock); // itt function will get to the right lock object.
2067#endif
2068#if KMP_USE_INLINED_TAS
2069 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2070 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2071 } else
2072#elif KMP_USE_INLINED_FUTEX
2073 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2074 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2075 } else
2076#endif
2077 {
2078 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2079 }
2080#if USE_ITT_BUILD
2081 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2082#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002083
2084#else // KMP_USE_DYNAMIC_LOCK
2085
Jonathan Peyton30419822017-05-12 18:01:32 +00002086 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002087
Jonathan Peyton30419822017-05-12 18:01:32 +00002088 if ((__kmp_user_lock_kind == lk_tas) &&
2089 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2090 lck = (kmp_user_lock_p)user_lock;
2091 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002092#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002093 else if ((__kmp_user_lock_kind == lk_futex) &&
2094 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2095 lck = (kmp_user_lock_p)user_lock;
2096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 else {
2099 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2100 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101
2102#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002103 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104#endif /* USE_ITT_BUILD */
2105
Jonathan Peyton30419822017-05-12 18:01:32 +00002106 ACQUIRE_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107
2108#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002109 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002111
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002112#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00002113 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2114 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck);
2115 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002116#endif
2117
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002118#endif // KMP_USE_DYNAMIC_LOCK
2119}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120
Jonathan Peyton30419822017-05-12 18:01:32 +00002121void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002122#if KMP_USE_DYNAMIC_LOCK
2123
Jonathan Peyton30419822017-05-12 18:01:32 +00002124#if USE_ITT_BUILD
2125 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2126#endif
2127 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2128#if USE_ITT_BUILD
2129 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002130#endif
2131
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002132#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00002133 if (ompt_enabled) {
2134 // missing support here: need to know whether acquired first or not
2135 }
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002136#endif
2137
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002138#else // KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 int acquire_status;
2140 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141
Jonathan Peyton30419822017-05-12 18:01:32 +00002142 if ((__kmp_user_lock_kind == lk_tas) &&
2143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2144 OMP_NEST_LOCK_T_SIZE)) {
2145 lck = (kmp_user_lock_p)user_lock;
2146 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002147#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002148 else if ((__kmp_user_lock_kind == lk_futex) &&
2149 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2150 OMP_NEST_LOCK_T_SIZE)) {
2151 lck = (kmp_user_lock_p)user_lock;
2152 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002154 else {
2155 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157
2158#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002159 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002160#endif /* USE_ITT_BUILD */
2161
Jonathan Peyton30419822017-05-12 18:01:32 +00002162 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002163
2164#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002165 __kmp_itt_lock_acquired(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166#endif /* USE_ITT_BUILD */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002167
2168#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00002169 if (ompt_enabled) {
2170 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2171 if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2172 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)(
2173 (uint64_t)lck);
2174 } else {
2175 if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2176 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)(
2177 (uint64_t)lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002178 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002179 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002180#endif
Jonathan Peyton2c295c42015-12-23 02:34:03 +00002181
2182#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183}
2184
Jonathan Peyton30419822017-05-12 18:01:32 +00002185void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002186#if KMP_USE_DYNAMIC_LOCK
2187
Jonathan Peyton30419822017-05-12 18:01:32 +00002188 int tag = KMP_EXTRACT_D_TAG(user_lock);
2189#if USE_ITT_BUILD
2190 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2191#endif
2192#if KMP_USE_INLINED_TAS
2193 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2194 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2195 } else
2196#elif KMP_USE_INLINED_FUTEX
2197 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2198 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2199 } else
2200#endif
2201 {
2202 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2203 }
2204
2205#else // KMP_USE_DYNAMIC_LOCK
2206
2207 kmp_user_lock_p lck;
2208
2209 /* Can't use serial interval since not block structured */
2210 /* release the lock */
2211
2212 if ((__kmp_user_lock_kind == lk_tas) &&
2213 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2214#if KMP_OS_LINUX && \
2215 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2216// "fast" path implemented to fix customer performance issue
2217#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002218 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002219#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002220 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2221 KMP_MB();
2222 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002223#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002224 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002225#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002226 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002227#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002228 else if ((__kmp_user_lock_kind == lk_futex) &&
2229 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2230 lck = (kmp_user_lock_p)user_lock;
2231 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002232#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002233 else {
2234 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2235 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236
2237#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002238 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002239#endif /* USE_ITT_BUILD */
2240
Jonathan Peyton30419822017-05-12 18:01:32 +00002241 RELEASE_LOCK(lck, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002242
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002243#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00002244 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2245 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck);
2246 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002247#endif
2248
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002249#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250}
2251
2252/* release the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002253void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002254#if KMP_USE_DYNAMIC_LOCK
2255
Jonathan Peyton30419822017-05-12 18:01:32 +00002256#if USE_ITT_BUILD
2257 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2258#endif
2259 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2260
2261#else // KMP_USE_DYNAMIC_LOCK
2262
2263 kmp_user_lock_p lck;
2264
2265 /* Can't use serial interval since not block structured */
2266
2267 if ((__kmp_user_lock_kind == lk_tas) &&
2268 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2269 OMP_NEST_LOCK_T_SIZE)) {
2270#if KMP_OS_LINUX && \
2271 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2272 // "fast" path implemented to fix customer performance issue
2273 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2274#if USE_ITT_BUILD
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002275 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002277 if (--(tl->lk.depth_locked) == 0) {
2278 TCW_4(tl->lk.poll, 0);
2279 }
2280 KMP_MB();
2281 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002283 lck = (kmp_user_lock_p)user_lock;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002284#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002285 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002286#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002287 else if ((__kmp_user_lock_kind == lk_futex) &&
2288 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2289 OMP_NEST_LOCK_T_SIZE)) {
2290 lck = (kmp_user_lock_p)user_lock;
2291 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002293 else {
2294 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2295 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296
2297#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002298 __kmp_itt_lock_releasing(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299#endif /* USE_ITT_BUILD */
2300
Jonathan Peyton30419822017-05-12 18:01:32 +00002301 int release_status;
2302 release_status = RELEASE_NESTED_LOCK(lck, gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002303#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00002304 if (ompt_enabled) {
2305 if (release_status == KMP_LOCK_RELEASED) {
2306 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2307 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2308 (uint64_t)lck);
2309 }
2310 } else if (ompt_callbacks.ompt_callback(
2311 ompt_event_release_nest_lock_prev)) {
2312 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2313 (uint64_t)lck);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002314 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002315 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002317
2318#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002319}
2320
2321/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002322int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2323 KMP_COUNT_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002324
2325#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002326 int rc;
2327 int tag = KMP_EXTRACT_D_TAG(user_lock);
2328#if USE_ITT_BUILD
2329 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2330#endif
2331#if KMP_USE_INLINED_TAS
2332 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2333 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2334 } else
2335#elif KMP_USE_INLINED_FUTEX
2336 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2337 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2338 } else
2339#endif
2340 {
2341 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2342 }
2343 if (rc) {
2344#if USE_ITT_BUILD
2345 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2346#endif
2347 return FTN_TRUE;
2348 } else {
2349#if USE_ITT_BUILD
2350 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2351#endif
2352 return FTN_FALSE;
2353 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002354
2355#else // KMP_USE_DYNAMIC_LOCK
2356
Jonathan Peyton30419822017-05-12 18:01:32 +00002357 kmp_user_lock_p lck;
2358 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 if ((__kmp_user_lock_kind == lk_tas) &&
2361 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2362 lck = (kmp_user_lock_p)user_lock;
2363 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002364#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002365 else if ((__kmp_user_lock_kind == lk_futex) &&
2366 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2367 lck = (kmp_user_lock_p)user_lock;
2368 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002370 else {
2371 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2372 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002373
2374#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002375 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376#endif /* USE_ITT_BUILD */
2377
Jonathan Peyton30419822017-05-12 18:01:32 +00002378 rc = TEST_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002379#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002380 if (rc) {
2381 __kmp_itt_lock_acquired(lck);
2382 } else {
2383 __kmp_itt_lock_cancelled(lck);
2384 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002385#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002386 return (rc ? FTN_TRUE : FTN_FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002387
Jonathan Peyton30419822017-05-12 18:01:32 +00002388/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002389
2390#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002391}
2392
2393/* try to acquire the lock */
Jonathan Peyton30419822017-05-12 18:01:32 +00002394int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002395#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002396 int rc;
2397#if USE_ITT_BUILD
2398 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2399#endif
2400 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2401#if USE_ITT_BUILD
2402 if (rc) {
2403 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2404 } else {
2405 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2406 }
2407#endif
2408 return rc;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002409
2410#else // KMP_USE_DYNAMIC_LOCK
2411
Jonathan Peyton30419822017-05-12 18:01:32 +00002412 kmp_user_lock_p lck;
2413 int rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414
Jonathan Peyton30419822017-05-12 18:01:32 +00002415 if ((__kmp_user_lock_kind == lk_tas) &&
2416 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2417 OMP_NEST_LOCK_T_SIZE)) {
2418 lck = (kmp_user_lock_p)user_lock;
2419 }
Jonathan Peyton9d2412c2016-06-22 16:35:12 +00002420#if KMP_USE_FUTEX
Jonathan Peyton30419822017-05-12 18:01:32 +00002421 else if ((__kmp_user_lock_kind == lk_futex) &&
2422 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2423 OMP_NEST_LOCK_T_SIZE)) {
2424 lck = (kmp_user_lock_p)user_lock;
2425 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002426#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002427 else {
2428 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
2429 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430
2431#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002432 __kmp_itt_lock_acquiring(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002433#endif /* USE_ITT_BUILD */
2434
Jonathan Peyton30419822017-05-12 18:01:32 +00002435 rc = TEST_NESTED_LOCK(lck, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002436#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002437 if (rc) {
2438 __kmp_itt_lock_acquired(lck);
2439 } else {
2440 __kmp_itt_lock_cancelled(lck);
2441 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002443 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444
Jonathan Peyton30419822017-05-12 18:01:32 +00002445/* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002446
2447#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002448}
2449
Jonathan Peyton30419822017-05-12 18:01:32 +00002450// Interface to fast scalable reduce methods routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451
Jonathan Peyton30419822017-05-12 18:01:32 +00002452// keep the selected method in a thread local structure for cross-function
2453// usage: will be used in __kmpc_end_reduce* functions;
2454// another solution: to re-determine the method one more time in
2455// __kmpc_end_reduce* functions (new prototype required then)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456// AT: which solution is better?
Jonathan Peyton30419822017-05-12 18:01:32 +00002457#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
2458 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459
Jonathan Peyton30419822017-05-12 18:01:32 +00002460#define __KMP_GET_REDUCTION_METHOD(gtid) \
2461 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002462
Jonathan Peyton30419822017-05-12 18:01:32 +00002463// description of the packed_reduction_method variable: look at the macros in
2464// kmp.h
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465
2466// used in a critical section reduce block
2467static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00002468__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
2469 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470
Jonathan Peyton30419822017-05-12 18:01:32 +00002471 // this lock was visible to a customer and to the threading profile tool as a
2472 // serial overhead span (although it's used for an internal purpose only)
2473 // why was it visible in previous implementation?
2474 // should we keep it visible in new reduce block?
2475 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002477#if KMP_USE_DYNAMIC_LOCK
2478
Jonathan Peyton30419822017-05-12 18:01:32 +00002479 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2480 // Check if it is initialized.
2481 if (*lk == 0) {
2482 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2483 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
2484 KMP_GET_D_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002485 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002486 __kmp_init_indirect_csptr(crit, loc, global_tid,
2487 KMP_GET_I_TAG(__kmp_user_lock_seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002488 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002489 }
2490 // Branch for accessing the actual lock object and set operation. This
2491 // branching is inevitable since this lock initialization does not follow the
2492 // normal dispatch path (lock table is not used).
2493 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2494 lck = (kmp_user_lock_p)lk;
2495 KMP_DEBUG_ASSERT(lck != NULL);
2496 if (__kmp_env_consistency_check) {
2497 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2498 }
2499 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
2500 } else {
2501 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2502 lck = ilk->lock;
2503 KMP_DEBUG_ASSERT(lck != NULL);
2504 if (__kmp_env_consistency_check) {
2505 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2506 }
2507 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
2508 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002509
2510#else // KMP_USE_DYNAMIC_LOCK
2511
Jonathan Peyton30419822017-05-12 18:01:32 +00002512 // We know that the fast reduction code is only emitted by Intel compilers
2513 // with 32 byte critical sections. If there isn't enough space, then we
2514 // have to use a pointer.
2515 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
2516 lck = (kmp_user_lock_p)crit;
2517 } else {
2518 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
2519 }
2520 KMP_DEBUG_ASSERT(lck != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521
Jonathan Peyton30419822017-05-12 18:01:32 +00002522 if (__kmp_env_consistency_check)
2523 __kmp_push_sync(global_tid, ct_critical, loc, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524
Jonathan Peyton30419822017-05-12 18:01:32 +00002525 __kmp_acquire_user_lock_with_checks(lck, global_tid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002526
2527#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002528}
2529
2530// used in a critical section reduce block
2531static __forceinline void
Jonathan Peyton30419822017-05-12 18:01:32 +00002532__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
2533 kmp_critical_name *crit) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534
Jonathan Peyton30419822017-05-12 18:01:32 +00002535 kmp_user_lock_p lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002537#if KMP_USE_DYNAMIC_LOCK
2538
Jonathan Peyton30419822017-05-12 18:01:32 +00002539 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2540 lck = (kmp_user_lock_p)crit;
2541 if (__kmp_env_consistency_check)
2542 __kmp_pop_sync(global_tid, ct_critical, loc);
2543 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2544 } else {
2545 kmp_indirect_lock_t *ilk =
2546 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2547 if (__kmp_env_consistency_check)
2548 __kmp_pop_sync(global_tid, ct_critical, loc);
2549 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2550 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002551
2552#else // KMP_USE_DYNAMIC_LOCK
2553
Jonathan Peyton30419822017-05-12 18:01:32 +00002554 // We know that the fast reduction code is only emitted by Intel compilers
2555 // with 32 byte critical sections. If there isn't enough space, then we have
2556 // to use a pointer.
2557 if (__kmp_base_user_lock_size > 32) {
2558 lck = *((kmp_user_lock_p *)crit);
2559 KMP_ASSERT(lck != NULL);
2560 } else {
2561 lck = (kmp_user_lock_p)crit;
2562 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002563
Jonathan Peyton30419822017-05-12 18:01:32 +00002564 if (__kmp_env_consistency_check)
2565 __kmp_pop_sync(global_tid, ct_critical, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002566
Jonathan Peyton30419822017-05-12 18:01:32 +00002567 __kmp_release_user_lock_with_checks(lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002568
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002569#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002570} // __kmp_end_critical_section_reduce_block
2571
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572/* 2.a.i. Reduce Block without a terminating barrier */
2573/*!
2574@ingroup SYNCHRONIZATION
2575@param loc source location information
2576@param global_tid global thread number
2577@param num_vars number of items (variables) to be reduced
2578@param reduce_size size of data in bytes to be reduced
2579@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00002580@param reduce_func callback function providing reduction operation on two
2581operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00002582@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00002583@result 1 for the master thread, 0 for all other team threads, 2 for all team
2584threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00002585
2586The nowait version is used for a reduce clause with the nowait argument.
2587*/
2588kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +00002589__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
2590 size_t reduce_size, void *reduce_data,
2591 void (*reduce_func)(void *lhs_data, void *rhs_data),
2592 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593
Jonathan Peyton30419822017-05-12 18:01:32 +00002594 KMP_COUNT_BLOCK(REDUCE_nowait);
2595 int retval = 0;
2596 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002597#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002598 kmp_team_t *team;
2599 kmp_info_t *th;
2600 int teams_swapped = 0, task_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002601#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002602 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603
Jonathan Peyton30419822017-05-12 18:01:32 +00002604 // why do we need this initialization here at all?
2605 // Reduction clause can not be used as a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606
Jonathan Peyton30419822017-05-12 18:01:32 +00002607 // do not call __kmp_serial_initialize(), it will be called by
2608 // __kmp_parallel_initialize() if needed
2609 // possible detection of false-positive race by the threadchecker ???
2610 if (!TCR_4(__kmp_init_parallel))
2611 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612
Jonathan Peyton30419822017-05-12 18:01:32 +00002613// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002614#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002615 if (__kmp_env_consistency_check)
2616 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002617#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002618 if (__kmp_env_consistency_check)
2619 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002620#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002622#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002623 th = __kmp_thread_from_gtid(global_tid);
2624 if (th->th.th_teams_microtask) { // AC: check if we are inside the teams
2625 // construct?
2626 team = th->th.th_team;
2627 if (team->t.t_level == th->th.th_teams_level) {
2628 // this is reduction at teams construct
2629 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2630 // Let's swap teams temporarily for the reduction barrier
2631 teams_swapped = 1;
2632 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2633 th->th.th_team = team->t.t_parent;
2634 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2635 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2636 task_state = th->th.th_task_state;
2637 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002638 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002639 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002640#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641
Jonathan Peyton30419822017-05-12 18:01:32 +00002642 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
2643 // the value should be kept in a variable
2644 // the variable should be either a construct-specific or thread-specific
2645 // property, not a team specific property
2646 // (a thread can reach the next reduce block on the next construct, reduce
2647 // method may differ on the next construct)
2648 // an ident_t "loc" parameter could be used as a construct-specific property
2649 // (what if loc == 0?)
2650 // (if both construct-specific and team-specific variables were shared,
2651 // then unness extra syncs should be needed)
2652 // a thread-specific variable is better regarding two issues above (next
2653 // construct and extra syncs)
2654 // a thread-specific "th_local.reduction_method" variable is used currently
2655 // each thread executes 'determine' and 'set' lines (no need to execute by one
2656 // thread, to avoid unness extra syncs)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 packed_reduction_method = __kmp_determine_reduction_method(
2659 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
2660 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661
Jonathan Peyton30419822017-05-12 18:01:32 +00002662 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002663
Jonathan Peyton30419822017-05-12 18:01:32 +00002664 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
2665 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002666
Jonathan Peyton30419822017-05-12 18:01:32 +00002667 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002668
Jonathan Peyton30419822017-05-12 18:01:32 +00002669 // usage: if team size == 1, no synchronization is required ( Intel
2670 // platforms only )
2671 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674
Jonathan Peyton30419822017-05-12 18:01:32 +00002675 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676
Jonathan Peyton30419822017-05-12 18:01:32 +00002677 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
2678 // won't be called by the code gen)
2679 // (it's not quite good, because the checking block has been closed by
2680 // this 'pop',
2681 // but atomic operation has not been executed yet, will be executed
2682 // slightly later, literally on next instruction)
2683 if (__kmp_env_consistency_check)
2684 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002685
Jonathan Peyton30419822017-05-12 18:01:32 +00002686 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2687 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688
Jonathan Peyton30419822017-05-12 18:01:32 +00002689// AT: performance issue: a real barrier here
2690// AT: (if master goes slow, other threads are blocked here waiting for the
2691// master to come and release them)
2692// AT: (it's not what a customer might expect specifying NOWAIT clause)
2693// AT: (specifying NOWAIT won't result in improvement of performance, it'll
2694// be confusing to a customer)
2695// AT: another implementation of *barrier_gather*nowait() (or some other design)
2696// might go faster and be more in line with sense of NOWAIT
2697// AT: TO DO: do epcc test and compare times
Jim Cownie5e8470a2013-09-27 10:38:44 +00002698
Jonathan Peyton30419822017-05-12 18:01:32 +00002699// this barrier should be invisible to a customer and to the threading profile
2700// tool (it's neither a terminating barrier nor customer's code, it's
2701// used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002702#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002703 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002704#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002705 retval =
2706 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2707 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
2708 retval = (retval != 0) ? (0) : (1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709
Jonathan Peyton30419822017-05-12 18:01:32 +00002710 // all other workers except master should do this pop here
2711 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2712 if (__kmp_env_consistency_check) {
2713 if (retval == 0) {
2714 __kmp_pop_sync(global_tid, ct_reduce, loc);
2715 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002716 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002717
2718 } else {
2719
2720 // should never reach this block
2721 KMP_ASSERT(0); // "unexpected method"
2722 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002723#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 if (teams_swapped) {
2725 // Restore thread structure
2726 th->th.th_info.ds.ds_tid = 0;
2727 th->th.th_team = team;
2728 th->th.th_team_nproc = team->t.t_nproc;
2729 th->th.th_task_team = team->t.t_task_team[task_state];
2730 th->th.th_task_state = task_state;
2731 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002732#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002733 KA_TRACE(
2734 10,
2735 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
2736 global_tid, packed_reduction_method, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737
Jonathan Peyton30419822017-05-12 18:01:32 +00002738 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739}
2740
2741/*!
2742@ingroup SYNCHRONIZATION
2743@param loc source location information
2744@param global_tid global thread id.
2745@param lck pointer to the unique lock data structure
2746
2747Finish the execution of a reduce nowait.
2748*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002749void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2750 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002751
Jonathan Peyton30419822017-05-12 18:01:32 +00002752 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753
Jonathan Peyton30419822017-05-12 18:01:32 +00002754 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755
Jonathan Peyton30419822017-05-12 18:01:32 +00002756 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002757
Jonathan Peyton30419822017-05-12 18:01:32 +00002758 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002759
Jonathan Peyton30419822017-05-12 18:01:32 +00002760 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002761
Jonathan Peyton30419822017-05-12 18:01:32 +00002762 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002763
Jonathan Peyton30419822017-05-12 18:01:32 +00002764 // usage: if team size == 1, no synchronization is required ( on Intel
2765 // platforms only )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002766
Jonathan Peyton30419822017-05-12 18:01:32 +00002767 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002768
Jonathan Peyton30419822017-05-12 18:01:32 +00002769 // neither master nor other workers should get here
2770 // (code gen does not generate this call in case 2: atomic reduce block)
2771 // actually it's better to remove this elseif at all;
2772 // after removal this value will checked by the 'else' and will assert
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773
Jonathan Peyton30419822017-05-12 18:01:32 +00002774 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2775 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776
Jonathan Peyton30419822017-05-12 18:01:32 +00002777 // only master gets here
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778
Jonathan Peyton30419822017-05-12 18:01:32 +00002779 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780
Jonathan Peyton30419822017-05-12 18:01:32 +00002781 // should never reach this block
2782 KMP_ASSERT(0); // "unexpected method"
2783 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784
Jonathan Peyton30419822017-05-12 18:01:32 +00002785 if (__kmp_env_consistency_check)
2786 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002787
Jonathan Peyton30419822017-05-12 18:01:32 +00002788 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
2789 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002790
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792}
2793
2794/* 2.a.ii. Reduce Block with a terminating barrier */
2795
2796/*!
2797@ingroup SYNCHRONIZATION
2798@param loc source location information
2799@param global_tid global thread number
2800@param num_vars number of items (variables) to be reduced
2801@param reduce_size size of data in bytes to be reduced
2802@param reduce_data pointer to data to be reduced
Jonathan Peyton30419822017-05-12 18:01:32 +00002803@param reduce_func callback function providing reduction operation on two
2804operands and returning result of reduction in lhs_data
Jim Cownie5e8470a2013-09-27 10:38:44 +00002805@param lck pointer to the unique lock data structure
Jonathan Peyton30419822017-05-12 18:01:32 +00002806@result 1 for the master thread, 0 for all other team threads, 2 for all team
2807threads if atomic reduction needed
Jim Cownie5e8470a2013-09-27 10:38:44 +00002808
2809A blocking reduce that includes an implicit barrier.
2810*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002811kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
2812 size_t reduce_size, void *reduce_data,
2813 void (*reduce_func)(void *lhs_data, void *rhs_data),
2814 kmp_critical_name *lck) {
2815 KMP_COUNT_BLOCK(REDUCE_wait);
2816 int retval = 0;
2817 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818
Jonathan Peyton30419822017-05-12 18:01:32 +00002819 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002820
Jonathan Peyton30419822017-05-12 18:01:32 +00002821 // why do we need this initialization here at all?
2822 // Reduction clause can not be a stand-alone directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823
Jonathan Peyton30419822017-05-12 18:01:32 +00002824 // do not call __kmp_serial_initialize(), it will be called by
2825 // __kmp_parallel_initialize() if needed
2826 // possible detection of false-positive race by the threadchecker ???
2827 if (!TCR_4(__kmp_init_parallel))
2828 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002829
Jonathan Peyton30419822017-05-12 18:01:32 +00002830// check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002831#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00002832 if (__kmp_env_consistency_check)
2833 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002834#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002835 if (__kmp_env_consistency_check)
2836 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002837#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 packed_reduction_method = __kmp_determine_reduction_method(
2840 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
2841 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
2846 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847
Jonathan Peyton30419822017-05-12 18:01:32 +00002848 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002849
Jonathan Peyton30419822017-05-12 18:01:32 +00002850 // usage: if team size == 1, no synchronization is required ( Intel
2851 // platforms only )
2852 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002853
Jonathan Peyton30419822017-05-12 18:01:32 +00002854 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002855
Jonathan Peyton30419822017-05-12 18:01:32 +00002856 retval = 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857
Jonathan Peyton30419822017-05-12 18:01:32 +00002858 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2859 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860
Jonathan Peyton30419822017-05-12 18:01:32 +00002861// case tree_reduce_block:
2862// this barrier should be visible to a customer and to the threading profile
2863// tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002864#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002865 __kmp_threads[global_tid]->th.th_ident =
2866 loc; // needed for correct notification of frames
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002867#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002868 retval =
2869 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2870 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
2871 retval = (retval != 0) ? (0) : (1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872
Jonathan Peyton30419822017-05-12 18:01:32 +00002873 // all other workers except master should do this pop here
2874 // ( none of other workers except master will enter __kmpc_end_reduce() )
2875 if (__kmp_env_consistency_check) {
2876 if (retval == 0) { // 0: all other workers; 1: master
2877 __kmp_pop_sync(global_tid, ct_reduce, loc);
2878 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002879 }
2880
Jonathan Peyton30419822017-05-12 18:01:32 +00002881 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882
Jonathan Peyton30419822017-05-12 18:01:32 +00002883 // should never reach this block
2884 KMP_ASSERT(0); // "unexpected method"
2885 }
2886
2887 KA_TRACE(10,
2888 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
2889 global_tid, packed_reduction_method, retval));
2890
2891 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002892}
2893
2894/*!
2895@ingroup SYNCHRONIZATION
2896@param loc source location information
2897@param global_tid global thread id.
2898@param lck pointer to the unique lock data structure
2899
2900Finish the execution of a blocking reduce.
Jonathan Peyton30419822017-05-12 18:01:32 +00002901The <tt>lck</tt> pointer must be the same as that used in the corresponding
2902start function.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903*/
Jonathan Peyton30419822017-05-12 18:01:32 +00002904void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2905 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002906
Jonathan Peyton30419822017-05-12 18:01:32 +00002907 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002908
Jonathan Peyton30419822017-05-12 18:01:32 +00002909 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002910
Jonathan Peyton30419822017-05-12 18:01:32 +00002911 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 // this barrier should be visible to a customer and to the threading profile
2914 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915
Jonathan Peyton30419822017-05-12 18:01:32 +00002916 if (packed_reduction_method == critical_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002917
Jonathan Peyton30419822017-05-12 18:01:32 +00002918 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002919
Jonathan Peyton30419822017-05-12 18:01:32 +00002920// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002921#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002922 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002923#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925
Jonathan Peyton30419822017-05-12 18:01:32 +00002926 } else if (packed_reduction_method == empty_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002927
Jonathan Peyton30419822017-05-12 18:01:32 +00002928// usage: if team size==1, no synchronization is required (Intel platforms only)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002929
Jonathan Peyton30419822017-05-12 18:01:32 +00002930// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002931#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002932 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002933#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002934 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002935
Jonathan Peyton30419822017-05-12 18:01:32 +00002936 } else if (packed_reduction_method == atomic_reduce_block) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937
Jonathan Peyton30419822017-05-12 18:01:32 +00002938// TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002939#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002940 __kmp_threads[global_tid]->th.th_ident = loc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002941#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002942 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002943
Jonathan Peyton30419822017-05-12 18:01:32 +00002944 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2945 tree_reduce_block)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946
Jonathan Peyton30419822017-05-12 18:01:32 +00002947 // only master executes here (master releases all other workers)
2948 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2949 global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950
Jonathan Peyton30419822017-05-12 18:01:32 +00002951 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952
Jonathan Peyton30419822017-05-12 18:01:32 +00002953 // should never reach this block
2954 KMP_ASSERT(0); // "unexpected method"
2955 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002956
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 if (__kmp_env_consistency_check)
2958 __kmp_pop_sync(global_tid, ct_reduce, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959
Jonathan Peyton30419822017-05-12 18:01:32 +00002960 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
2961 global_tid, packed_reduction_method));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962
Jonathan Peyton30419822017-05-12 18:01:32 +00002963 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002964}
2965
2966#undef __KMP_GET_REDUCTION_METHOD
2967#undef __KMP_SET_REDUCTION_METHOD
2968
Jonathan Peyton30419822017-05-12 18:01:32 +00002969/* end of interface to fast scalable reduce routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002970
Jonathan Peyton30419822017-05-12 18:01:32 +00002971kmp_uint64 __kmpc_get_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972
Jonathan Peyton30419822017-05-12 18:01:32 +00002973 kmp_int32 gtid;
2974 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002975
Jonathan Peyton30419822017-05-12 18:01:32 +00002976 gtid = __kmp_get_gtid();
2977 if (gtid < 0) {
2978 return 0;
2979 }; // if
2980 thread = __kmp_thread_from_gtid(gtid);
2981 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982
2983} // __kmpc_get_taskid
2984
Jonathan Peyton30419822017-05-12 18:01:32 +00002985kmp_uint64 __kmpc_get_parent_taskid() {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002986
Jonathan Peyton30419822017-05-12 18:01:32 +00002987 kmp_int32 gtid;
2988 kmp_info_t *thread;
2989 kmp_taskdata_t *parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002990
Jonathan Peyton30419822017-05-12 18:01:32 +00002991 gtid = __kmp_get_gtid();
2992 if (gtid < 0) {
2993 return 0;
2994 }; // if
2995 thread = __kmp_thread_from_gtid(gtid);
2996 parent_task = thread->th.th_current_task->td_parent;
2997 return (parent_task == NULL ? 0 : parent_task->td_task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002998
2999} // __kmpc_get_parent_taskid
3000
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003001#if OMP_45_ENABLED
Jonathan Peyton71909c52016-03-02 22:42:06 +00003002/*!
3003@ingroup WORK_SHARING
3004@param loc source location information.
3005@param gtid global thread number.
3006@param num_dims number of associated doacross loops.
3007@param dims info on loops bounds.
3008
3009Initialize doacross loop information.
3010Expect compiler send us inclusive bounds,
3011e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3012*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003013void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3014 struct kmp_dim *dims) {
3015 int j, idx;
3016 kmp_int64 last, trace_count;
3017 kmp_info_t *th = __kmp_threads[gtid];
3018 kmp_team_t *team = th->th.th_team;
3019 kmp_uint32 *flags;
3020 kmp_disp_t *pr_buf = th->th.th_dispatch;
3021 dispatch_shared_info_t *sh_buf;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003022
Jonathan Peyton30419822017-05-12 18:01:32 +00003023 KA_TRACE(
3024 20,
3025 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3026 gtid, num_dims, !team->t.t_serialized));
3027 KMP_DEBUG_ASSERT(dims != NULL);
3028 KMP_DEBUG_ASSERT(num_dims > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003029
Jonathan Peyton30419822017-05-12 18:01:32 +00003030 if (team->t.t_serialized) {
3031 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3032 return; // no dependencies if team is serialized
3033 }
3034 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3035 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3036 // the next loop
3037 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
Jonathan Peyton71909c52016-03-02 22:42:06 +00003038
Jonathan Peyton30419822017-05-12 18:01:32 +00003039 // Save bounds info into allocated private buffer
3040 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3041 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3042 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3043 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3044 pr_buf->th_doacross_info[0] =
3045 (kmp_int64)num_dims; // first element is number of dimensions
3046 // Save also address of num_done in order to access it later without knowing
3047 // the buffer index
3048 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3049 pr_buf->th_doacross_info[2] = dims[0].lo;
3050 pr_buf->th_doacross_info[3] = dims[0].up;
3051 pr_buf->th_doacross_info[4] = dims[0].st;
3052 last = 5;
3053 for (j = 1; j < num_dims; ++j) {
3054 kmp_int64
3055 range_length; // To keep ranges of all dimensions but the first dims[0]
3056 if (dims[j].st == 1) { // most common case
3057 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3058 range_length = dims[j].up - dims[j].lo + 1;
3059 } else {
3060 if (dims[j].st > 0) {
3061 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3062 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3063 } else { // negative increment
3064 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3065 range_length =
3066 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3067 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003068 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003069 pr_buf->th_doacross_info[last++] = range_length;
3070 pr_buf->th_doacross_info[last++] = dims[j].lo;
3071 pr_buf->th_doacross_info[last++] = dims[j].up;
3072 pr_buf->th_doacross_info[last++] = dims[j].st;
3073 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003074
Jonathan Peyton30419822017-05-12 18:01:32 +00003075 // Compute total trip count.
3076 // Start with range of dims[0] which we don't need to keep in the buffer.
3077 if (dims[0].st == 1) { // most common case
3078 trace_count = dims[0].up - dims[0].lo + 1;
3079 } else if (dims[0].st > 0) {
3080 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3081 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3082 } else { // negative increment
3083 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3084 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3085 }
3086 for (j = 1; j < num_dims; ++j) {
3087 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3088 }
3089 KMP_DEBUG_ASSERT(trace_count > 0);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003090
Jonathan Peyton30419822017-05-12 18:01:32 +00003091 // Check if shared buffer is not occupied by other loop (idx -
3092 // __kmp_dispatch_num_buffers)
3093 if (idx != sh_buf->doacross_buf_idx) {
3094 // Shared buffer is occupied, wait for it to be free
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003095 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3096 __kmp_eq_4, NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 }
3098 // Check if we are the first thread. After the CAS the first thread gets 0,
3099 // others get 1 if initialization is in progress, allocated pointer otherwise.
3100 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3101 (kmp_int64 *)&sh_buf->doacross_flags, NULL, (kmp_int64)1);
3102 if (flags == NULL) {
3103 // we are the first thread, allocate the array of flags
3104 kmp_int64 size =
3105 trace_count / 8 + 8; // in bytes, use single bit per iteration
3106 sh_buf->doacross_flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3107 } else if ((kmp_int64)flags == 1) {
3108 // initialization is still in progress, need to wait
3109 while ((volatile kmp_int64)sh_buf->doacross_flags == 1) {
3110 KMP_YIELD(TRUE);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003111 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003112 }
3113 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags >
3114 1); // check value of pointer
3115 pr_buf->th_doacross_flags =
3116 sh_buf->doacross_flags; // save private copy in order to not
3117 // touch shared buffer on each iteration
3118 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003119}
3120
Jonathan Peyton30419822017-05-12 18:01:32 +00003121void __kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec) {
3122 kmp_int32 shft, num_dims, i;
3123 kmp_uint32 flag;
3124 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3125 kmp_info_t *th = __kmp_threads[gtid];
3126 kmp_team_t *team = th->th.th_team;
3127 kmp_disp_t *pr_buf;
3128 kmp_int64 lo, up, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003129
Jonathan Peyton30419822017-05-12 18:01:32 +00003130 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3131 if (team->t.t_serialized) {
3132 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3133 return; // no dependencies if team is serialized
3134 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003135
Jonathan Peyton30419822017-05-12 18:01:32 +00003136 // calculate sequential iteration number and check out-of-bounds condition
3137 pr_buf = th->th.th_dispatch;
3138 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3139 num_dims = pr_buf->th_doacross_info[0];
3140 lo = pr_buf->th_doacross_info[2];
3141 up = pr_buf->th_doacross_info[3];
3142 st = pr_buf->th_doacross_info[4];
3143 if (st == 1) { // most common case
3144 if (vec[0] < lo || vec[0] > up) {
3145 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3146 "bounds [%lld,%lld]\n",
3147 gtid, vec[0], lo, up));
3148 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003149 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003150 iter_number = vec[0] - lo;
3151 } else if (st > 0) {
3152 if (vec[0] < lo || vec[0] > up) {
3153 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3154 "bounds [%lld,%lld]\n",
3155 gtid, vec[0], lo, up));
3156 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003157 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003158 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3159 } else { // negative increment
3160 if (vec[0] > lo || vec[0] < up) {
3161 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3162 "bounds [%lld,%lld]\n",
3163 gtid, vec[0], lo, up));
3164 return;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003165 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3167 }
3168 for (i = 1; i < num_dims; ++i) {
3169 kmp_int64 iter, ln;
3170 kmp_int32 j = i * 4;
3171 ln = pr_buf->th_doacross_info[j + 1];
3172 lo = pr_buf->th_doacross_info[j + 2];
3173 up = pr_buf->th_doacross_info[j + 3];
3174 st = pr_buf->th_doacross_info[j + 4];
3175 if (st == 1) {
3176 if (vec[i] < lo || vec[i] > up) {
3177 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3178 "bounds [%lld,%lld]\n",
3179 gtid, vec[i], lo, up));
3180 return;
3181 }
3182 iter = vec[i] - lo;
3183 } else if (st > 0) {
3184 if (vec[i] < lo || vec[i] > up) {
3185 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3186 "bounds [%lld,%lld]\n",
3187 gtid, vec[i], lo, up));
3188 return;
3189 }
3190 iter = (kmp_uint64)(vec[i] - lo) / st;
3191 } else { // st < 0
3192 if (vec[i] > lo || vec[i] < up) {
3193 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3194 "bounds [%lld,%lld]\n",
3195 gtid, vec[i], lo, up));
3196 return;
3197 }
3198 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3199 }
3200 iter_number = iter + ln * iter_number;
3201 }
3202 shft = iter_number % 32; // use 32-bit granularity
3203 iter_number >>= 5; // divided by 32
3204 flag = 1 << shft;
3205 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3206 KMP_YIELD(TRUE);
3207 }
3208 KA_TRACE(20,
3209 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3210 gtid, (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003211}
3212
Jonathan Peyton30419822017-05-12 18:01:32 +00003213void __kmpc_doacross_post(ident_t *loc, int gtid, long long *vec) {
3214 kmp_int32 shft, num_dims, i;
3215 kmp_uint32 flag;
3216 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3217 kmp_info_t *th = __kmp_threads[gtid];
3218 kmp_team_t *team = th->th.th_team;
3219 kmp_disp_t *pr_buf;
3220 kmp_int64 lo, st;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003221
Jonathan Peyton30419822017-05-12 18:01:32 +00003222 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3223 if (team->t.t_serialized) {
3224 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
3225 return; // no dependencies if team is serialized
3226 }
Jonathan Peyton71909c52016-03-02 22:42:06 +00003227
Jonathan Peyton30419822017-05-12 18:01:32 +00003228 // calculate sequential iteration number (same as in "wait" but no
3229 // out-of-bounds checks)
3230 pr_buf = th->th.th_dispatch;
3231 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3232 num_dims = pr_buf->th_doacross_info[0];
3233 lo = pr_buf->th_doacross_info[2];
3234 st = pr_buf->th_doacross_info[4];
3235 if (st == 1) { // most common case
3236 iter_number = vec[0] - lo;
3237 } else if (st > 0) {
3238 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3239 } else { // negative increment
3240 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3241 }
3242 for (i = 1; i < num_dims; ++i) {
3243 kmp_int64 iter, ln;
3244 kmp_int32 j = i * 4;
3245 ln = pr_buf->th_doacross_info[j + 1];
3246 lo = pr_buf->th_doacross_info[j + 2];
3247 st = pr_buf->th_doacross_info[j + 4];
3248 if (st == 1) {
3249 iter = vec[i] - lo;
3250 } else if (st > 0) {
3251 iter = (kmp_uint64)(vec[i] - lo) / st;
3252 } else { // st < 0
3253 iter = (kmp_uint64)(lo - vec[i]) / (-st);
Jonathan Peyton71909c52016-03-02 22:42:06 +00003254 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003255 iter_number = iter + ln * iter_number;
3256 }
3257 shft = iter_number % 32; // use 32-bit granularity
3258 iter_number >>= 5; // divided by 32
3259 flag = 1 << shft;
3260 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00003261 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
Jonathan Peyton30419822017-05-12 18:01:32 +00003262 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
3263 (iter_number << 5) + shft));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003264}
3265
Jonathan Peyton30419822017-05-12 18:01:32 +00003266void __kmpc_doacross_fini(ident_t *loc, int gtid) {
3267 kmp_int64 num_done;
3268 kmp_info_t *th = __kmp_threads[gtid];
3269 kmp_team_t *team = th->th.th_team;
3270 kmp_disp_t *pr_buf = th->th.th_dispatch;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003271
Jonathan Peyton30419822017-05-12 18:01:32 +00003272 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3273 if (team->t.t_serialized) {
3274 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
3275 return; // nothing to do
3276 }
3277 num_done = KMP_TEST_THEN_INC64((kmp_int64 *)pr_buf->th_doacross_info[1]) + 1;
3278 if (num_done == th->th.th_team_nproc) {
3279 // we are the last thread, need to free shared resources
3280 int idx = pr_buf->th_doacross_buf_idx - 1;
3281 dispatch_shared_info_t *sh_buf =
3282 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3283 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
3284 (kmp_int64)&sh_buf->doacross_num_done);
3285 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3286 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003287 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
Jonathan Peyton30419822017-05-12 18:01:32 +00003288 sh_buf->doacross_flags = NULL;
3289 sh_buf->doacross_num_done = 0;
3290 sh_buf->doacross_buf_idx +=
3291 __kmp_dispatch_num_buffers; // free buffer for future re-use
3292 }
3293 // free private resources (need to keep buffer index forever)
3294 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
3295 pr_buf->th_doacross_info = NULL;
3296 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
Jonathan Peyton71909c52016-03-02 22:42:06 +00003297}
3298#endif
3299
Jim Cownie5e8470a2013-09-27 10:38:44 +00003300// end of file //