blob: 517f56f56805f9e254e107a1185a8f4ffda6a729 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
17
18#include "kmp.h"
19#include "kmp_itt.h"
20#include "kmp_i18n.h"
21#include "kmp_lock.h"
22#include "kmp_io.h"
23
Andrey Churbanovcbda8682015-01-13 14:43:35 +000024#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +000025# include <unistd.h>
26# include <sys/syscall.h>
27// We should really include <futex.h>, but that causes compatibility problems on different
28// Linux* OS distributions that either require that you include (or break when you try to include)
29// <pci/types.h>.
30// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31// we just define the constants here and don't include <futex.h>
32# ifndef FUTEX_WAIT
33# define FUTEX_WAIT 0
34# endif
35# ifndef FUTEX_WAKE
36# define FUTEX_WAKE 1
37# endif
38#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void
44__kmp_validate_locks( void )
45{
46 int i;
47 kmp_uint32 x, y;
48
49 /* Check to make sure unsigned arithmetic does wraps properly */
50 x = ~((kmp_uint32) 0) - 2;
51 y = x - 2;
52
53 for (i = 0; i < 8; ++i, ++x, ++y) {
54 kmp_uint32 z = (x - y);
55 KMP_ASSERT( z == 2 );
56 }
57
58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59}
60
61
62/* ------------------------------------------------------------------------ */
63/* test and set locks */
64
65//
66// For the non-nested locks, we can only assume that the first 4 bytes were
67// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68// compiler only allocates a 4 byte pointer on IA-32 architecture. On
69// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70//
71// gcc reserves >= 8 bytes for nested locks, so we can assume that the
72// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73//
74
75static kmp_int32
76__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +000078 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000079}
80
81static inline bool
82__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83{
84 return lck->lk.depth_locked != -1;
85}
86
Jonathan Peyton0e6d4572015-10-16 16:52:58 +000087__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +000088__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89{
90 KMP_MB();
91
92#ifdef USE_LOCK_PROFILE
93 kmp_uint32 curr = TCR_4( lck->lk.poll );
94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96 /* else __kmp_printf( "." );*/
97#endif /* USE_LOCK_PROFILE */
98
Jonathan Peytonf2d119f2015-12-03 19:37:20 +000099 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101 KMP_FSYNC_ACQUIRED(lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000102 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103 }
104
105 kmp_uint32 spins;
106 KMP_FSYNC_PREPARE( lck );
107 KMP_INIT_YIELD( spins );
108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109 __kmp_xproc ) ) {
110 KMP_YIELD( TRUE );
111 }
112 else {
113 KMP_YIELD_SPIN( spins );
114 }
115
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000116 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118 //
119 // FIXME - use exponential backoff here
120 //
121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122 __kmp_xproc ) ) {
123 KMP_YIELD( TRUE );
124 }
125 else {
126 KMP_YIELD_SPIN( spins );
127 }
128 }
129 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000130 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131}
132
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000133int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
135{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000136 return __kmp_acquire_tas_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137}
138
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000139static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
141{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000142 char const * const func = "omp_set_lock";
143 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
144 && __kmp_is_tas_lock_nestable( lck ) ) {
145 KMP_FATAL( LockNestableUsedAsSimple, func );
146 }
147 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
148 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000150 return __kmp_acquire_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151}
152
153int
154__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000156 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
157 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158 KMP_FSYNC_ACQUIRED( lck );
159 return TRUE;
160 }
161 return FALSE;
162}
163
164static int
165__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
166{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000167 char const * const func = "omp_test_lock";
168 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169 && __kmp_is_tas_lock_nestable( lck ) ) {
170 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 }
172 return __kmp_test_tas_lock( lck, gtid );
173}
174
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000175int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000176__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177{
178 KMP_MB(); /* Flush all pending memory write invalidates. */
179
180 KMP_FSYNC_RELEASING(lck);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000181 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182 KMP_MB(); /* Flush all pending memory write invalidates. */
183
184 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000186 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187}
188
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000189static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000190__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
191{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000192 char const * const func = "omp_unset_lock";
193 KMP_MB(); /* in case another processor initialized lock */
194 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
195 && __kmp_is_tas_lock_nestable( lck ) ) {
196 KMP_FATAL( LockNestableUsedAsSimple, func );
197 }
198 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
199 KMP_FATAL( LockUnsettingFree, func );
200 }
201 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
202 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
203 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000204 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000205 return __kmp_release_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206}
207
208void
209__kmp_init_tas_lock( kmp_tas_lock_t * lck )
210{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000211 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214static void
215__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
216{
217 __kmp_init_tas_lock( lck );
218}
219
220void
221__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
222{
223 lck->lk.poll = 0;
224}
225
226static void
227__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
228{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000229 char const * const func = "omp_destroy_lock";
230 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
231 && __kmp_is_tas_lock_nestable( lck ) ) {
232 KMP_FATAL( LockNestableUsedAsSimple, func );
233 }
234 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
235 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236 }
237 __kmp_destroy_tas_lock( lck );
238}
239
240
241//
242// nested test and set locks
243//
244
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000245int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
247{
248 KMP_DEBUG_ASSERT( gtid >= 0 );
249
250 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
251 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000252 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253 }
254 else {
255 __kmp_acquire_tas_lock_timed_template( lck, gtid );
256 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000257 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000258 }
259}
260
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000261static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
263{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 char const * const func = "omp_set_nest_lock";
265 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
266 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000268 return __kmp_acquire_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269}
270
271int
272__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
273{
274 int retval;
275
276 KMP_DEBUG_ASSERT( gtid >= 0 );
277
278 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
279 retval = ++lck->lk.depth_locked;
280 }
281 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
282 retval = 0;
283 }
284 else {
285 KMP_MB();
286 retval = lck->lk.depth_locked = 1;
287 }
288 return retval;
289}
290
291static int
292__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000294 char const * const func = "omp_test_nest_lock";
295 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
296 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 }
298 return __kmp_test_nested_tas_lock( lck, gtid );
299}
300
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000301int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000302__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
303{
304 KMP_DEBUG_ASSERT( gtid >= 0 );
305
306 KMP_MB();
307 if ( --(lck->lk.depth_locked) == 0 ) {
308 __kmp_release_tas_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000309 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000311 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312}
313
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000314static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
316{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000317 char const * const func = "omp_unset_nest_lock";
318 KMP_MB(); /* in case another processor initialized lock */
319 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
320 KMP_FATAL( LockSimpleUsedAsNestable, func );
321 }
322 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
323 KMP_FATAL( LockUnsettingFree, func );
324 }
325 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
326 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000327 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000328 return __kmp_release_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329}
330
331void
332__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
333{
334 __kmp_init_tas_lock( lck );
335 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
336}
337
338static void
339__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
340{
341 __kmp_init_nested_tas_lock( lck );
342}
343
344void
345__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
346{
347 __kmp_destroy_tas_lock( lck );
348 lck->lk.depth_locked = 0;
349}
350
351static void
352__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
353{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000354 char const * const func = "omp_destroy_nest_lock";
355 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
356 KMP_FATAL( LockSimpleUsedAsNestable, func );
357 }
358 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
359 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360 }
361 __kmp_destroy_nested_tas_lock( lck );
362}
363
364
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000365#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366
367/* ------------------------------------------------------------------------ */
368/* futex locks */
369
370// futex locks are really just test and set locks, with a different method
371// of handling contention. They take the same amount of space as test and
372// set locks, and are allocated the same way (i.e. use the area allocated by
373// the compiler for non-nested locks / allocate nested locks on the heap).
374
375static kmp_int32
376__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
377{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000378 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000379}
380
381static inline bool
382__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
383{
384 return lck->lk.depth_locked != -1;
385}
386
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000387__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000388__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
389{
390 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
391
392 KMP_MB();
393
394#ifdef USE_LOCK_PROFILE
395 kmp_uint32 curr = TCR_4( lck->lk.poll );
396 if ( ( curr != 0 ) && ( curr != gtid_code ) )
397 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
398 /* else __kmp_printf( "." );*/
399#endif /* USE_LOCK_PROFILE */
400
401 KMP_FSYNC_PREPARE( lck );
402 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
403 lck, lck->lk.poll, gtid ) );
404
405 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000406
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000407 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
408 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000409
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000410 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
412 lck, gtid, poll_val, cond ) );
413
414 //
415 // NOTE: if you try to use the following condition for this branch
416 //
417 // if ( poll_val & 1 == 0 )
418 //
419 // Then the 12.0 compiler has a bug where the following block will
420 // always be skipped, regardless of the value of the LSB of poll_val.
421 //
422 if ( ! cond ) {
423 //
424 // Try to set the lsb in the poll to indicate to the owner
425 // thread that they need to wake this thread up.
426 //
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000427 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
429 lck, lck->lk.poll, gtid ) );
430 continue;
431 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000432 poll_val |= KMP_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433
434 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
435 lck, lck->lk.poll, gtid ) );
436 }
437
438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
439 lck, gtid, poll_val ) );
440
441 kmp_int32 rc;
442 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
443 poll_val, NULL, NULL, 0 ) ) != 0 ) {
444 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
445 lck, gtid, poll_val, rc, errno ) );
446 continue;
447 }
448
449 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
450 lck, gtid, poll_val ) );
451 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000452 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453 // entered on the OS futex queue. We must now perform a futex
454 // wake call when releasing the lock, as we have no idea how many
455 // other threads are in the queue.
456 //
457 gtid_code |= 1;
458 }
459
460 KMP_FSYNC_ACQUIRED( lck );
461 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
462 lck, lck->lk.poll, gtid ) );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000463 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464}
465
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000466int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000467__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
468{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000469 return __kmp_acquire_futex_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470}
471
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000472static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
474{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000475 char const * const func = "omp_set_lock";
476 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
477 && __kmp_is_futex_lock_nestable( lck ) ) {
478 KMP_FATAL( LockNestableUsedAsSimple, func );
479 }
480 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
481 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000483 return __kmp_acquire_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000484}
485
486int
487__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
488{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000489 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490 KMP_FSYNC_ACQUIRED( lck );
491 return TRUE;
492 }
493 return FALSE;
494}
495
496static int
497__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
498{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000499 char const * const func = "omp_test_lock";
500 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
501 && __kmp_is_futex_lock_nestable( lck ) ) {
502 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000503 }
504 return __kmp_test_futex_lock( lck, gtid );
505}
506
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000507int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000508__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
509{
510 KMP_MB(); /* Flush all pending memory write invalidates. */
511
512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
513 lck, lck->lk.poll, gtid ) );
514
515 KMP_FSYNC_RELEASING(lck);
516
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000517 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000518
519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
520 lck, gtid, poll_val ) );
521
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000522 if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
524 lck, gtid ) );
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000525 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526 }
527
528 KMP_MB(); /* Flush all pending memory write invalidates. */
529
530 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
531 lck, lck->lk.poll, gtid ) );
532
533 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
534 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000535 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536}
537
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000538static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
540{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000541 char const * const func = "omp_unset_lock";
542 KMP_MB(); /* in case another processor initialized lock */
543 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
544 && __kmp_is_futex_lock_nestable( lck ) ) {
545 KMP_FATAL( LockNestableUsedAsSimple, func );
546 }
547 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
548 KMP_FATAL( LockUnsettingFree, func );
549 }
550 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
551 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
552 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000553 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000554 return __kmp_release_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000555}
556
557void
558__kmp_init_futex_lock( kmp_futex_lock_t * lck )
559{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000560 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000561}
562
563static void
564__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
565{
566 __kmp_init_futex_lock( lck );
567}
568
569void
570__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
571{
572 lck->lk.poll = 0;
573}
574
575static void
576__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
577{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000578 char const * const func = "omp_destroy_lock";
579 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
580 && __kmp_is_futex_lock_nestable( lck ) ) {
581 KMP_FATAL( LockNestableUsedAsSimple, func );
582 }
583 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
584 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585 }
586 __kmp_destroy_futex_lock( lck );
587}
588
589
590//
591// nested futex locks
592//
593
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000594int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
596{
597 KMP_DEBUG_ASSERT( gtid >= 0 );
598
599 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
600 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000601 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602 }
603 else {
604 __kmp_acquire_futex_lock_timed_template( lck, gtid );
605 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000606 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607 }
608}
609
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000610static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
612{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000613 char const * const func = "omp_set_nest_lock";
614 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
615 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000617 return __kmp_acquire_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618}
619
620int
621__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
622{
623 int retval;
624
625 KMP_DEBUG_ASSERT( gtid >= 0 );
626
627 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
628 retval = ++lck->lk.depth_locked;
629 }
630 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
631 retval = 0;
632 }
633 else {
634 KMP_MB();
635 retval = lck->lk.depth_locked = 1;
636 }
637 return retval;
638}
639
640static int
641__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
642{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000643 char const * const func = "omp_test_nest_lock";
644 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
645 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646 }
647 return __kmp_test_nested_futex_lock( lck, gtid );
648}
649
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000650int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000651__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
652{
653 KMP_DEBUG_ASSERT( gtid >= 0 );
654
655 KMP_MB();
656 if ( --(lck->lk.depth_locked) == 0 ) {
657 __kmp_release_futex_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000658 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000660 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000661}
662
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000663static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
665{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 char const * const func = "omp_unset_nest_lock";
667 KMP_MB(); /* in case another processor initialized lock */
668 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
669 KMP_FATAL( LockSimpleUsedAsNestable, func );
670 }
671 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
672 KMP_FATAL( LockUnsettingFree, func );
673 }
674 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
675 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000676 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000677 return __kmp_release_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678}
679
680void
681__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
682{
683 __kmp_init_futex_lock( lck );
684 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
685}
686
687static void
688__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
689{
690 __kmp_init_nested_futex_lock( lck );
691}
692
693void
694__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
695{
696 __kmp_destroy_futex_lock( lck );
697 lck->lk.depth_locked = 0;
698}
699
700static void
701__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
702{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000703 char const * const func = "omp_destroy_nest_lock";
704 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
705 KMP_FATAL( LockSimpleUsedAsNestable, func );
706 }
707 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
708 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000709 }
710 __kmp_destroy_nested_futex_lock( lck );
711}
712
Jim Cownie181b4bb2013-12-23 17:28:57 +0000713#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
715
716/* ------------------------------------------------------------------------ */
717/* ticket (bakery) locks */
718
719static kmp_int32
720__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
721{
722 return TCR_4( lck->lk.owner_id ) - 1;
723}
724
725static inline bool
726__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
727{
728 return lck->lk.depth_locked != -1;
729}
730
731static kmp_uint32
732__kmp_bakery_check(kmp_uint value, kmp_uint checker)
733{
734 register kmp_uint32 pause;
735
736 if (value == checker) {
737 return TRUE;
738 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000739 for (pause = checker - value; pause != 0; --pause);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740 return FALSE;
741}
742
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000743__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
745{
746 kmp_uint32 my_ticket;
747 KMP_MB();
748
749 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
750
751#ifdef USE_LOCK_PROFILE
752 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
753 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
754 /* else __kmp_printf( "." );*/
755#endif /* USE_LOCK_PROFILE */
756
757 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
758 KMP_FSYNC_ACQUIRED(lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000759 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000760 }
761 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
762 KMP_FSYNC_ACQUIRED(lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000763 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000764}
765
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000766int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000769 return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770}
771
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000772static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000773__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000775 char const * const func = "omp_set_lock";
776 if ( lck->lk.initialized != lck ) {
777 KMP_FATAL( LockIsUninitialized, func );
778 }
779 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
780 KMP_FATAL( LockNestableUsedAsSimple, func );
781 }
782 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
783 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784 }
785
786 __kmp_acquire_ticket_lock( lck, gtid );
787
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000788 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000789 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790}
791
792int
793__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
794{
795 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
796 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
797 kmp_uint32 next_ticket = my_ticket + 1;
798 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
799 my_ticket, next_ticket ) ) {
800 KMP_FSYNC_ACQUIRED( lck );
801 return TRUE;
802 }
803 }
804 return FALSE;
805}
806
807static int
808__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000810 char const * const func = "omp_test_lock";
811 if ( lck->lk.initialized != lck ) {
812 KMP_FATAL( LockIsUninitialized, func );
813 }
814 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
815 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816 }
817
818 int retval = __kmp_test_ticket_lock( lck, gtid );
819
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000820 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821 lck->lk.owner_id = gtid + 1;
822 }
823 return retval;
824}
825
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000826int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
828{
829 kmp_uint32 distance;
830
831 KMP_MB(); /* Flush all pending memory write invalidates. */
832
833 KMP_FSYNC_RELEASING(lck);
834 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
835
836 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
837
838 KMP_MB(); /* Flush all pending memory write invalidates. */
839
840 KMP_YIELD( distance
841 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000842 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843}
844
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000845static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
847{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000848 char const * const func = "omp_unset_lock";
849 KMP_MB(); /* in case another processor initialized lock */
850 if ( lck->lk.initialized != lck ) {
851 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000853 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
854 KMP_FATAL( LockNestableUsedAsSimple, func );
855 }
856 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
857 KMP_FATAL( LockUnsettingFree, func );
858 }
859 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
860 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
861 KMP_FATAL( LockUnsettingSetByAnother, func );
862 }
863 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000864 return __kmp_release_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865}
866
867void
868__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
869{
870 lck->lk.location = NULL;
871 TCW_4( lck->lk.next_ticket, 0 );
872 TCW_4( lck->lk.now_serving, 0 );
873 lck->lk.owner_id = 0; // no thread owns the lock.
874 lck->lk.depth_locked = -1; // -1 => not a nested lock.
875 lck->lk.initialized = (kmp_ticket_lock *)lck;
876}
877
878static void
879__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
880{
881 __kmp_init_ticket_lock( lck );
882}
883
884void
885__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
886{
887 lck->lk.initialized = NULL;
888 lck->lk.location = NULL;
889 lck->lk.next_ticket = 0;
890 lck->lk.now_serving = 0;
891 lck->lk.owner_id = 0;
892 lck->lk.depth_locked = -1;
893}
894
895static void
896__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
897{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000898 char const * const func = "omp_destroy_lock";
899 if ( lck->lk.initialized != lck ) {
900 KMP_FATAL( LockIsUninitialized, func );
901 }
902 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903 KMP_FATAL( LockNestableUsedAsSimple, func );
904 }
905 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000907 }
908 __kmp_destroy_ticket_lock( lck );
909}
910
911
912//
913// nested ticket locks
914//
915
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000916int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000917__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918{
919 KMP_DEBUG_ASSERT( gtid >= 0 );
920
921 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000923 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924 }
925 else {
926 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927 KMP_MB();
928 lck->lk.depth_locked = 1;
929 KMP_MB();
930 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000931 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932 }
933}
934
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000935static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000936__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
937{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000938 char const * const func = "omp_set_nest_lock";
939 if ( lck->lk.initialized != lck ) {
940 KMP_FATAL( LockIsUninitialized, func );
941 }
942 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
943 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000944 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000945 return __kmp_acquire_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000946}
947
948int
949__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
950{
951 int retval;
952
953 KMP_DEBUG_ASSERT( gtid >= 0 );
954
955 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
956 retval = ++lck->lk.depth_locked;
957 }
958 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
959 retval = 0;
960 }
961 else {
962 KMP_MB();
963 retval = lck->lk.depth_locked = 1;
964 KMP_MB();
965 lck->lk.owner_id = gtid + 1;
966 }
967 return retval;
968}
969
970static int
971__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
972 kmp_int32 gtid )
973{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000974 char const * const func = "omp_test_nest_lock";
975 if ( lck->lk.initialized != lck ) {
976 KMP_FATAL( LockIsUninitialized, func );
977 }
978 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
979 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000980 }
981 return __kmp_test_nested_ticket_lock( lck, gtid );
982}
983
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000984int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
986{
987 KMP_DEBUG_ASSERT( gtid >= 0 );
988
989 KMP_MB();
990 if ( --(lck->lk.depth_locked) == 0 ) {
991 KMP_MB();
992 lck->lk.owner_id = 0;
993 __kmp_release_ticket_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000994 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000996 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000997}
998
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000999static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001000__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001002 char const * const func = "omp_unset_nest_lock";
1003 KMP_MB(); /* in case another processor initialized lock */
1004 if ( lck->lk.initialized != lck ) {
1005 KMP_FATAL( LockIsUninitialized, func );
1006 }
1007 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008 KMP_FATAL( LockSimpleUsedAsNestable, func );
1009 }
1010 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011 KMP_FATAL( LockUnsettingFree, func );
1012 }
1013 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001015 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001016 return __kmp_release_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017}
1018
1019void
1020__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021{
1022 __kmp_init_ticket_lock( lck );
1023 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024}
1025
1026static void
1027__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028{
1029 __kmp_init_nested_ticket_lock( lck );
1030}
1031
1032void
1033__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034{
1035 __kmp_destroy_ticket_lock( lck );
1036 lck->lk.depth_locked = 0;
1037}
1038
1039static void
1040__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001042 char const * const func = "omp_destroy_nest_lock";
1043 if ( lck->lk.initialized != lck ) {
1044 KMP_FATAL( LockIsUninitialized, func );
1045 }
1046 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047 KMP_FATAL( LockSimpleUsedAsNestable, func );
1048 }
1049 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001051 }
1052 __kmp_destroy_nested_ticket_lock( lck );
1053}
1054
1055
1056//
1057// access functions to fields which don't exist for all lock kinds.
1058//
1059
1060static int
1061__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062{
1063 return lck == lck->lk.initialized;
1064}
1065
1066static const ident_t *
1067__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068{
1069 return lck->lk.location;
1070}
1071
1072static void
1073__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074{
1075 lck->lk.location = loc;
1076}
1077
1078static kmp_lock_flags_t
1079__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080{
1081 return lck->lk.flags;
1082}
1083
1084static void
1085__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086{
1087 lck->lk.flags = flags;
1088}
1089
1090/* ------------------------------------------------------------------------ */
1091/* queuing locks */
1092
1093/*
1094 * First the states
1095 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1096 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1097 * h, h means lock is held or about to transition, 1 element on queue
1098 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1099 *
1100 * Now the transitions
1101 * Acquire(0,0) = -1 ,0
1102 * Release(0,0) = Error
1103 * Acquire(-1,0) = h ,h h > 0
1104 * Release(-1,0) = 0 ,0
1105 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1106 * Release(h,h) = -1 ,0 h > 0
1107 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109 *
1110 * And pictorially
1111 *
1112 *
1113 * +-----+
1114 * | 0, 0|------- release -------> Error
1115 * +-----+
1116 * | ^
1117 * acquire| |release
1118 * | |
1119 * | |
1120 * v |
1121 * +-----+
1122 * |-1, 0|
1123 * +-----+
1124 * | ^
1125 * acquire| |release
1126 * | |
1127 * | |
1128 * v |
1129 * +-----+
1130 * | h, h|
1131 * +-----+
1132 * | ^
1133 * acquire| |release
1134 * | |
1135 * | |
1136 * v |
1137 * +-----+
1138 * | h, t|----- acquire, release loopback ---+
1139 * +-----+ |
1140 * ^ |
1141 * | |
1142 * +------------------------------------+
1143 *
1144 */
1145
1146#ifdef DEBUG_QUEUING_LOCKS
1147
1148/* Stuff for circular trace buffer */
1149#define TRACE_BUF_ELE 1024
1150static char traces[TRACE_BUF_ELE][128] = { 0 }
1151static int tc = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001152#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1153#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155
1156static void
1157__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159{
1160 kmp_int32 t, i;
1161
1162 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163
1164 i = tc % TRACE_BUF_ELE;
1165 __kmp_printf_no_lock( "%s\n", traces[i] );
1166 i = (i+1) % TRACE_BUF_ELE;
1167 while ( i != (tc % TRACE_BUF_ELE) ) {
1168 __kmp_printf_no_lock( "%s", traces[i] );
1169 i = (i+1) % TRACE_BUF_ELE;
1170 }
1171 __kmp_printf_no_lock( "\n" );
1172
1173 __kmp_printf_no_lock(
1174 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176 head_id, tail_id );
1177
1178 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179
1180 if ( lck->lk.head_id >= 1 ) {
1181 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182 while (t > 0) {
1183 __kmp_printf_no_lock( "-> %d ", t );
1184 t = __kmp_threads[t-1]->th.th_next_waiting;
1185 }
1186 }
1187 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1188 __kmp_printf_no_lock( "\n\n" );
1189}
1190
1191#endif /* DEBUG_QUEUING_LOCKS */
1192
1193static kmp_int32
1194__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195{
1196 return TCR_4( lck->lk.owner_id ) - 1;
1197}
1198
1199static inline bool
1200__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201{
1202 return lck->lk.depth_locked != -1;
1203}
1204
1205/* Acquire a lock using a the queuing lock implementation */
1206template <bool takeTime>
1207/* [TLW] The unused template above is left behind because of what BEB believes is a
1208 potential compiler problem with __forceinline. */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001209__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001210__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211 kmp_int32 gtid )
1212{
1213 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1214 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1215 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1216 volatile kmp_uint32 *spin_here_p;
1217 kmp_int32 need_mf = 1;
1218
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001219#if OMPT_SUPPORT
1220 ompt_state_t prev_state = ompt_state_undefined;
1221#endif
1222
Jim Cownie5e8470a2013-09-27 10:38:44 +00001223 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1224
1225 KMP_FSYNC_PREPARE( lck );
1226 KMP_DEBUG_ASSERT( this_thr != NULL );
1227 spin_here_p = & this_thr->th.th_spin_here;
1228
1229#ifdef DEBUG_QUEUING_LOCKS
1230 TRACE_LOCK( gtid+1, "acq ent" );
1231 if ( *spin_here_p )
1232 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233 if ( this_thr->th.th_next_waiting != 0 )
1234 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1235#endif
1236 KMP_DEBUG_ASSERT( !*spin_here_p );
1237 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1238
1239
1240 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1241 that may follow, not just in execution order, but also in visibility order. This way,
1242 when a releasing thread observes the changes to the queue by this thread, it can
1243 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1244 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1245 to FALSE before this thread sets it to TRUE, this thread will hang.
1246 */
1247 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1248
1249 while( 1 ) {
1250 kmp_int32 enqueued;
1251 kmp_int32 head;
1252 kmp_int32 tail;
1253
1254 head = *head_id_p;
1255
1256 switch ( head ) {
1257
1258 case -1:
1259 {
1260#ifdef DEBUG_QUEUING_LOCKS
1261 tail = *tail_id_p;
1262 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1263#endif
1264 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1265 this assignment prevents us from entering the if ( t > 0 )
1266 condition in the enqueued case below, which is not necessary for
1267 this state transition */
1268
1269 need_mf = 0;
1270 /* try (-1,0)->(tid,tid) */
1271 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1272 KMP_PACK_64( -1, 0 ),
1273 KMP_PACK_64( gtid+1, gtid+1 ) );
1274#ifdef DEBUG_QUEUING_LOCKS
1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1276#endif
1277 }
1278 break;
1279
1280 default:
1281 {
1282 tail = *tail_id_p;
1283 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1284
1285#ifdef DEBUG_QUEUING_LOCKS
1286 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287#endif
1288
1289 if ( tail == 0 ) {
1290 enqueued = FALSE;
1291 }
1292 else {
1293 need_mf = 0;
1294 /* try (h,t) or (h,h)->(h,tid) */
1295 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1296
1297#ifdef DEBUG_QUEUING_LOCKS
1298 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1299#endif
1300 }
1301 }
1302 break;
1303
1304 case 0: /* empty queue */
1305 {
1306 kmp_int32 grabbed_lock;
1307
1308#ifdef DEBUG_QUEUING_LOCKS
1309 tail = *tail_id_p;
1310 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311#endif
1312 /* try (0,0)->(-1,0) */
1313
1314 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1315 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1316
1317 if ( grabbed_lock ) {
1318
1319 *spin_here_p = FALSE;
1320
1321 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1322 lck, gtid ));
1323#ifdef DEBUG_QUEUING_LOCKS
1324 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1325#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001326
1327#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001328 if (ompt_enabled && prev_state != ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329 /* change the state before clearing wait_id */
1330 this_thr->th.ompt_thread_info.state = prev_state;
1331 this_thr->th.ompt_thread_info.wait_id = 0;
1332 }
1333#endif
1334
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001336 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001337 }
1338 enqueued = FALSE;
1339 }
1340 break;
1341 }
1342
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001343#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001344 if (ompt_enabled && prev_state == ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001345 /* this thread will spin; set wait_id before entering wait state */
1346 prev_state = this_thr->th.ompt_thread_info.state;
1347 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1348 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1349 }
1350#endif
1351
Jim Cownie5e8470a2013-09-27 10:38:44 +00001352 if ( enqueued ) {
1353 if ( tail > 0 ) {
1354 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1355 KMP_ASSERT( tail_thr != NULL );
1356 tail_thr->th.th_next_waiting = gtid+1;
1357 /* corresponding wait for this write in release code */
1358 }
1359 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1360
1361
1362 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1363 * throughput only here.
1364 */
1365 KMP_MB();
1366 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1367
1368#ifdef DEBUG_QUEUING_LOCKS
1369 TRACE_LOCK( gtid+1, "acq spin" );
1370
1371 if ( this_thr->th.th_next_waiting != 0 )
1372 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1373#endif
1374 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1375 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1376 lck, gtid ));
1377
1378#ifdef DEBUG_QUEUING_LOCKS
1379 TRACE_LOCK( gtid+1, "acq exit 2" );
1380#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001381
1382#if OMPT_SUPPORT
1383 /* change the state before clearing wait_id */
1384 this_thr->th.ompt_thread_info.state = prev_state;
1385 this_thr->th.ompt_thread_info.wait_id = 0;
1386#endif
1387
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 /* got lock, we were dequeued by the thread that released lock */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001389 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 }
1391
1392 /* Yield if number of threads > number of logical processors */
1393 /* ToDo: Not sure why this should only be in oversubscription case,
1394 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1395 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1396 __kmp_xproc ) );
1397#ifdef DEBUG_QUEUING_LOCKS
1398 TRACE_LOCK( gtid+1, "acq retry" );
1399#endif
1400
1401 }
1402 KMP_ASSERT2( 0, "should not get here" );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001403 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001404}
1405
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001406int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001407__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408{
1409 KMP_DEBUG_ASSERT( gtid >= 0 );
1410
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001411 return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412}
1413
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001414static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001415__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1416 kmp_int32 gtid )
1417{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418 char const * const func = "omp_set_lock";
1419 if ( lck->lk.initialized != lck ) {
1420 KMP_FATAL( LockIsUninitialized, func );
1421 }
1422 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1423 KMP_FATAL( LockNestableUsedAsSimple, func );
1424 }
1425 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1426 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427 }
1428
1429 __kmp_acquire_queuing_lock( lck, gtid );
1430
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001431 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001432 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001433}
1434
1435int
1436__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1437{
1438 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1439 kmp_int32 head;
1440#ifdef KMP_DEBUG
1441 kmp_info_t *this_thr;
1442#endif
1443
1444 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1445 KMP_DEBUG_ASSERT( gtid >= 0 );
1446#ifdef KMP_DEBUG
1447 this_thr = __kmp_thread_from_gtid( gtid );
1448 KMP_DEBUG_ASSERT( this_thr != NULL );
1449 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1450#endif
1451
1452 head = *head_id_p;
1453
1454 if ( head == 0 ) { /* nobody on queue, nobody holding */
1455
1456 /* try (0,0)->(-1,0) */
1457
1458 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1459 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1460 KMP_FSYNC_ACQUIRED(lck);
1461 return TRUE;
1462 }
1463 }
1464
1465 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1466 return FALSE;
1467}
1468
1469static int
1470__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1471{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001472 char const * const func = "omp_test_lock";
1473 if ( lck->lk.initialized != lck ) {
1474 KMP_FATAL( LockIsUninitialized, func );
1475 }
1476 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1477 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478 }
1479
1480 int retval = __kmp_test_queuing_lock( lck, gtid );
1481
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001482 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001483 lck->lk.owner_id = gtid + 1;
1484 }
1485 return retval;
1486}
1487
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001488int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1490{
1491 register kmp_info_t *this_thr;
1492 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1494
1495 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1496 KMP_DEBUG_ASSERT( gtid >= 0 );
1497 this_thr = __kmp_thread_from_gtid( gtid );
1498 KMP_DEBUG_ASSERT( this_thr != NULL );
1499#ifdef DEBUG_QUEUING_LOCKS
1500 TRACE_LOCK( gtid+1, "rel ent" );
1501
1502 if ( this_thr->th.th_spin_here )
1503 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1504 if ( this_thr->th.th_next_waiting != 0 )
1505 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1506#endif
1507 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1508 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1509
1510 KMP_FSYNC_RELEASING(lck);
1511
1512 while( 1 ) {
1513 kmp_int32 dequeued;
1514 kmp_int32 head;
1515 kmp_int32 tail;
1516
1517 head = *head_id_p;
1518
1519#ifdef DEBUG_QUEUING_LOCKS
1520 tail = *tail_id_p;
1521 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1522 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1523#endif
1524 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1525
1526 if ( head == -1 ) { /* nobody on queue */
1527
1528 /* try (-1,0)->(0,0) */
1529 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1530 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1531 lck, gtid ));
1532#ifdef DEBUG_QUEUING_LOCKS
1533 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1534#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001535
1536#if OMPT_SUPPORT
1537 /* nothing to do - no other thread is trying to shift blame */
1538#endif
1539
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001540 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001541 }
1542 dequeued = FALSE;
1543
1544 }
1545 else {
1546
1547 tail = *tail_id_p;
1548 if ( head == tail ) { /* only one thread on the queue */
1549
1550#ifdef DEBUG_QUEUING_LOCKS
1551 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1552#endif
1553 KMP_DEBUG_ASSERT( head > 0 );
1554
1555 /* try (h,h)->(-1,0) */
1556 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1557 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1558#ifdef DEBUG_QUEUING_LOCKS
1559 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1560#endif
1561
1562 }
1563 else {
1564 volatile kmp_int32 *waiting_id_p;
1565 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1566 KMP_DEBUG_ASSERT( head_thr != NULL );
1567 waiting_id_p = & head_thr->th.th_next_waiting;
1568
1569 /* Does this require synchronous reads? */
1570#ifdef DEBUG_QUEUING_LOCKS
1571 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1572#endif
1573 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1574
1575 /* try (h,t)->(h',t) or (t,t) */
1576
1577 KMP_MB();
1578 /* make sure enqueuing thread has time to update next waiting thread field */
1579 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1580#ifdef DEBUG_QUEUING_LOCKS
1581 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1582#endif
1583 dequeued = TRUE;
1584 }
1585 }
1586
1587 if ( dequeued ) {
1588 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1589 KMP_DEBUG_ASSERT( head_thr != NULL );
1590
1591 /* Does this require synchronous reads? */
1592#ifdef DEBUG_QUEUING_LOCKS
1593 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1594#endif
1595 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1596
1597 /* For clean code only.
1598 * Thread not released until next statement prevents race with acquire code.
1599 */
1600 head_thr->th.th_next_waiting = 0;
1601#ifdef DEBUG_QUEUING_LOCKS
1602 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1603#endif
1604
1605 KMP_MB();
1606 /* reset spin value */
1607 head_thr->th.th_spin_here = FALSE;
1608
1609 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1610 lck, gtid ));
1611#ifdef DEBUG_QUEUING_LOCKS
1612 TRACE_LOCK( gtid+1, "rel exit 2" );
1613#endif
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001614 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615 }
1616 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1617
1618#ifdef DEBUG_QUEUING_LOCKS
1619 TRACE_LOCK( gtid+1, "rel retry" );
1620#endif
1621
1622 } /* while */
1623 KMP_ASSERT2( 0, "should not get here" );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001624 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625}
1626
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001627static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001628__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1629 kmp_int32 gtid )
1630{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001631 char const * const func = "omp_unset_lock";
1632 KMP_MB(); /* in case another processor initialized lock */
1633 if ( lck->lk.initialized != lck ) {
1634 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001636 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637 KMP_FATAL( LockNestableUsedAsSimple, func );
1638 }
1639 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1640 KMP_FATAL( LockUnsettingFree, func );
1641 }
1642 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1643 KMP_FATAL( LockUnsettingSetByAnother, func );
1644 }
1645 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001646 return __kmp_release_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647}
1648
1649void
1650__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1651{
1652 lck->lk.location = NULL;
1653 lck->lk.head_id = 0;
1654 lck->lk.tail_id = 0;
1655 lck->lk.next_ticket = 0;
1656 lck->lk.now_serving = 0;
1657 lck->lk.owner_id = 0; // no thread owns the lock.
1658 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1659 lck->lk.initialized = lck;
1660
1661 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1662}
1663
1664static void
1665__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1666{
1667 __kmp_init_queuing_lock( lck );
1668}
1669
1670void
1671__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1672{
1673 lck->lk.initialized = NULL;
1674 lck->lk.location = NULL;
1675 lck->lk.head_id = 0;
1676 lck->lk.tail_id = 0;
1677 lck->lk.next_ticket = 0;
1678 lck->lk.now_serving = 0;
1679 lck->lk.owner_id = 0;
1680 lck->lk.depth_locked = -1;
1681}
1682
1683static void
1684__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1685{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001686 char const * const func = "omp_destroy_lock";
1687 if ( lck->lk.initialized != lck ) {
1688 KMP_FATAL( LockIsUninitialized, func );
1689 }
1690 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1691 KMP_FATAL( LockNestableUsedAsSimple, func );
1692 }
1693 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1694 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695 }
1696 __kmp_destroy_queuing_lock( lck );
1697}
1698
1699
1700//
1701// nested queuing locks
1702//
1703
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001704int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1706{
1707 KMP_DEBUG_ASSERT( gtid >= 0 );
1708
1709 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1710 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001711 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001712 }
1713 else {
1714 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1715 KMP_MB();
1716 lck->lk.depth_locked = 1;
1717 KMP_MB();
1718 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001719 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720 }
1721}
1722
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001723static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1725{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001726 char const * const func = "omp_set_nest_lock";
1727 if ( lck->lk.initialized != lck ) {
1728 KMP_FATAL( LockIsUninitialized, func );
1729 }
1730 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1731 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001732 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001733 return __kmp_acquire_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001734}
1735
1736int
1737__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1738{
1739 int retval;
1740
1741 KMP_DEBUG_ASSERT( gtid >= 0 );
1742
1743 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1744 retval = ++lck->lk.depth_locked;
1745 }
1746 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1747 retval = 0;
1748 }
1749 else {
1750 KMP_MB();
1751 retval = lck->lk.depth_locked = 1;
1752 KMP_MB();
1753 lck->lk.owner_id = gtid + 1;
1754 }
1755 return retval;
1756}
1757
1758static int
1759__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1760 kmp_int32 gtid )
1761{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001762 char const * const func = "omp_test_nest_lock";
1763 if ( lck->lk.initialized != lck ) {
1764 KMP_FATAL( LockIsUninitialized, func );
1765 }
1766 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1767 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001768 }
1769 return __kmp_test_nested_queuing_lock( lck, gtid );
1770}
1771
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001772int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1774{
1775 KMP_DEBUG_ASSERT( gtid >= 0 );
1776
1777 KMP_MB();
1778 if ( --(lck->lk.depth_locked) == 0 ) {
1779 KMP_MB();
1780 lck->lk.owner_id = 0;
1781 __kmp_release_queuing_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001782 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001783 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001784 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785}
1786
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001787static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001788__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1789{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790 char const * const func = "omp_unset_nest_lock";
1791 KMP_MB(); /* in case another processor initialized lock */
1792 if ( lck->lk.initialized != lck ) {
1793 KMP_FATAL( LockIsUninitialized, func );
1794 }
1795 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796 KMP_FATAL( LockSimpleUsedAsNestable, func );
1797 }
1798 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1799 KMP_FATAL( LockUnsettingFree, func );
1800 }
1801 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1802 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001804 return __kmp_release_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805}
1806
1807void
1808__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1809{
1810 __kmp_init_queuing_lock( lck );
1811 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1812}
1813
1814static void
1815__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1816{
1817 __kmp_init_nested_queuing_lock( lck );
1818}
1819
1820void
1821__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1822{
1823 __kmp_destroy_queuing_lock( lck );
1824 lck->lk.depth_locked = 0;
1825}
1826
1827static void
1828__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1829{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001830 char const * const func = "omp_destroy_nest_lock";
1831 if ( lck->lk.initialized != lck ) {
1832 KMP_FATAL( LockIsUninitialized, func );
1833 }
1834 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1835 KMP_FATAL( LockSimpleUsedAsNestable, func );
1836 }
1837 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1838 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001839 }
1840 __kmp_destroy_nested_queuing_lock( lck );
1841}
1842
1843
1844//
1845// access functions to fields which don't exist for all lock kinds.
1846//
1847
1848static int
1849__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1850{
1851 return lck == lck->lk.initialized;
1852}
1853
1854static const ident_t *
1855__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1856{
1857 return lck->lk.location;
1858}
1859
1860static void
1861__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1862{
1863 lck->lk.location = loc;
1864}
1865
1866static kmp_lock_flags_t
1867__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1868{
1869 return lck->lk.flags;
1870}
1871
1872static void
1873__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1874{
1875 lck->lk.flags = flags;
1876}
1877
1878#if KMP_USE_ADAPTIVE_LOCKS
1879
1880/*
1881 RTM Adaptive locks
1882*/
1883
1884// TODO: Use the header for intrinsics below with the compiler 13.0
1885//#include <immintrin.h>
1886
1887// Values from the status register after failed speculation.
1888#define _XBEGIN_STARTED (~0u)
1889#define _XABORT_EXPLICIT (1 << 0)
1890#define _XABORT_RETRY (1 << 1)
1891#define _XABORT_CONFLICT (1 << 2)
1892#define _XABORT_CAPACITY (1 << 3)
1893#define _XABORT_DEBUG (1 << 4)
1894#define _XABORT_NESTED (1 << 5)
1895#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1896
1897// Aborts for which it's worth trying again immediately
1898#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1899
1900#define STRINGIZE_INTERNAL(arg) #arg
1901#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1902
1903// Access to RTM instructions
1904
1905/*
1906 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1907 This is the same definition as the compiler intrinsic that will be supported at some point.
1908*/
1909static __inline int _xbegin()
1910{
1911 int res = -1;
1912
1913#if KMP_OS_WINDOWS
1914#if KMP_ARCH_X86_64
1915 _asm {
1916 _emit 0xC7
1917 _emit 0xF8
1918 _emit 2
1919 _emit 0
1920 _emit 0
1921 _emit 0
1922 jmp L2
1923 mov res, eax
1924 L2:
1925 }
1926#else /* IA32 */
1927 _asm {
1928 _emit 0xC7
1929 _emit 0xF8
1930 _emit 2
1931 _emit 0
1932 _emit 0
1933 _emit 0
1934 jmp L2
1935 mov res, eax
1936 L2:
1937 }
1938#endif // KMP_ARCH_X86_64
1939#else
1940 /* Note that %eax must be noted as killed (clobbered), because
1941 * the XSR is returned in %eax(%rax) on abort. Other register
1942 * values are restored, so don't need to be killed.
1943 *
1944 * We must also mark 'res' as an input and an output, since otherwise
1945 * 'res=-1' may be dropped as being dead, whereas we do need the
1946 * assignment on the successful (i.e., non-abort) path.
1947 */
1948 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1949 " .long 1f-1b-6\n"
1950 " jmp 2f\n"
1951 "1: movl %%eax,%0\n"
1952 "2:"
1953 :"+r"(res)::"memory","%eax");
1954#endif // KMP_OS_WINDOWS
1955 return res;
1956}
1957
1958/*
1959 Transaction end
1960*/
1961static __inline void _xend()
1962{
1963#if KMP_OS_WINDOWS
1964 __asm {
1965 _emit 0x0f
1966 _emit 0x01
1967 _emit 0xd5
1968 }
1969#else
1970 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1971#endif
1972}
1973
1974/*
1975 This is a macro, the argument must be a single byte constant which
1976 can be evaluated by the inline assembler, since it is emitted as a
1977 byte into the assembly code.
1978*/
1979#if KMP_OS_WINDOWS
1980#define _xabort(ARG) \
1981 _asm _emit 0xc6 \
1982 _asm _emit 0xf8 \
1983 _asm _emit ARG
1984#else
1985#define _xabort(ARG) \
1986 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1987#endif
1988
1989//
1990// Statistics is collected for testing purpose
1991//
1992#if KMP_DEBUG_ADAPTIVE_LOCKS
1993
1994// We accumulate speculative lock statistics when the lock is destroyed.
1995// We keep locks that haven't been destroyed in the liveLocks list
1996// so that we can grab their statistics too.
1997static kmp_adaptive_lock_statistics_t destroyedStats;
1998
1999// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002000static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001
2002// A lock so we can safely update the list of locks.
2003static kmp_bootstrap_lock_t chain_lock;
2004
2005// Initialize the list of stats.
2006void
2007__kmp_init_speculative_stats()
2008{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002009 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
2011 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2012 lck->stats.next = lck;
2013 lck->stats.prev = lck;
2014
2015 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2016 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2017
2018 __kmp_init_bootstrap_lock( &chain_lock );
2019
2020}
2021
2022// Insert the lock into the circular list
2023static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002024__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025{
2026 __kmp_acquire_bootstrap_lock( &chain_lock );
2027
2028 lck->stats.next = liveLocks.stats.next;
2029 lck->stats.prev = &liveLocks;
2030
2031 liveLocks.stats.next = lck;
2032 lck->stats.next->stats.prev = lck;
2033
2034 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2035 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2036
2037 __kmp_release_bootstrap_lock( &chain_lock );
2038}
2039
2040static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002041__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042{
2043 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2044 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2045
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 kmp_adaptive_lock_info_t * n = lck->stats.next;
2047 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048
2049 n->stats.prev = p;
2050 p->stats.next = n;
2051}
2052
2053static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002054__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055{
2056 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2057 __kmp_remember_lock( lck );
2058}
2059
2060static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002061__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062{
2063 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2064
2065 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2066 t->successfulSpeculations += s->successfulSpeculations;
2067 t->hardFailedSpeculations += s->hardFailedSpeculations;
2068 t->softFailedSpeculations += s->softFailedSpeculations;
2069 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2070 t->lemmingYields += s->lemmingYields;
2071}
2072
2073static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002074__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002075{
2076 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2077
2078 __kmp_acquire_bootstrap_lock( &chain_lock );
2079
2080 __kmp_add_stats( &destroyedStats, lck );
2081 __kmp_forget_lock( lck );
2082
2083 __kmp_release_bootstrap_lock( &chain_lock );
2084}
2085
2086static float
2087percent (kmp_uint32 count, kmp_uint32 total)
2088{
2089 return (total == 0) ? 0.0: (100.0 * count)/total;
2090}
2091
2092static
2093FILE * __kmp_open_stats_file()
2094{
2095 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2096 return stdout;
2097
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002098 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099 char buffer[buffLen];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002100 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002101 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002102 FILE * result = fopen(&buffer[0], "w");
2103
2104 // Maybe we should issue a warning here...
2105 return result ? result : stdout;
2106}
2107
2108void
2109__kmp_print_speculative_stats()
2110{
2111 if (__kmp_user_lock_kind != lk_adaptive)
2112 return;
2113
2114 FILE * statsFile = __kmp_open_stats_file();
2115
2116 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002117 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118
2119 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2120 __kmp_add_stats( &total, lck );
2121 }
2122 kmp_adaptive_lock_statistics_t *t = &total;
2123 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2124 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2125 t->softFailedSpeculations;
2126
2127 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2128 fprintf ( statsFile, " Lock parameters: \n"
2129 " max_soft_retries : %10d\n"
2130 " max_badness : %10d\n",
2131 __kmp_adaptive_backoff_params.max_soft_retries,
2132 __kmp_adaptive_backoff_params.max_badness);
2133 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2134 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2135 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2136 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2137 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2138 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2139 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2140
2141 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2142 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2143 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2144 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2145 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2146 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2147 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2148
2149 if (statsFile != stdout)
2150 fclose( statsFile );
2151}
2152
2153# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2154#else
2155# define KMP_INC_STAT(lck,stat)
2156
2157#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2158
2159static inline bool
2160__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2161{
2162 // It is enough to check that the head_id is zero.
2163 // We don't also need to check the tail.
2164 bool res = lck->lk.head_id == 0;
2165
2166 // We need a fence here, since we must ensure that no memory operations
2167 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002168#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002169 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002170#else
2171 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002172#endif
2173
2174 return res;
2175}
2176
2177// Functions for manipulating the badness
2178static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002179__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002180{
2181 // Reset the badness to zero so we eagerly try to speculate again
2182 lck->lk.adaptive.badness = 0;
2183 KMP_INC_STAT(lck,successfulSpeculations);
2184}
2185
2186// Create a bit mask with one more set bit.
2187static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002188__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189{
2190 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2191 if ( newBadness > lck->lk.adaptive.max_badness) {
2192 return;
2193 } else {
2194 lck->lk.adaptive.badness = newBadness;
2195 }
2196}
2197
2198// Check whether speculation should be attempted.
2199static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002200__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002201{
2202 kmp_uint32 badness = lck->lk.adaptive.badness;
2203 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2204 int res = (attempts & badness) == 0;
2205 return res;
2206}
2207
2208// Attempt to acquire only the speculative lock.
2209// Does not back off to the non-speculative lock.
2210//
2211static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002212__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002213{
2214 int retries = lck->lk.adaptive.max_soft_retries;
2215
2216 // We don't explicitly count the start of speculation, rather we record
2217 // the results (success, hard fail, soft fail). The sum of all of those
2218 // is the total number of times we started speculation since all
2219 // speculations must end one of those ways.
2220 do
2221 {
2222 kmp_uint32 status = _xbegin();
2223 // Switch this in to disable actual speculation but exercise
2224 // at least some of the rest of the code. Useful for debugging...
2225 // kmp_uint32 status = _XABORT_NESTED;
2226
2227 if (status == _XBEGIN_STARTED )
2228 { /* We have successfully started speculation
2229 * Check that no-one acquired the lock for real between when we last looked
2230 * and now. This also gets the lock cache line into our read-set,
2231 * which we need so that we'll abort if anyone later claims it for real.
2232 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002233 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002234 {
2235 // Lock is now visibly acquired, so someone beat us to it.
2236 // Abort the transaction so we'll restart from _xbegin with the
2237 // failure status.
2238 _xabort(0x01)
2239 KMP_ASSERT2( 0, "should not get here" );
2240 }
2241 return 1; // Lock has been acquired (speculatively)
2242 } else {
2243 // We have aborted, update the statistics
2244 if ( status & SOFT_ABORT_MASK)
2245 {
2246 KMP_INC_STAT(lck,softFailedSpeculations);
2247 // and loop round to retry.
2248 }
2249 else
2250 {
2251 KMP_INC_STAT(lck,hardFailedSpeculations);
2252 // Give up if we had a hard failure.
2253 break;
2254 }
2255 }
2256 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2257
2258 // Either we had a hard failure or we didn't succeed softly after
2259 // the full set of attempts, so back off the badness.
2260 __kmp_step_badness( lck );
2261 return 0;
2262}
2263
2264// Attempt to acquire the speculative lock, or back off to the non-speculative one
2265// if the speculative lock cannot be acquired.
2266// We can succeed speculatively, non-speculatively, or fail.
2267static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002268__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002269{
2270 // First try to acquire the lock speculatively
2271 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2272 return 1;
2273
2274 // Speculative acquisition failed, so try to acquire it non-speculatively.
2275 // Count the non-speculative acquire attempt
2276 lck->lk.adaptive.acquire_attempts++;
2277
2278 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002279 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 {
2281 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2282 return 1; // Lock is acquired (non-speculatively)
2283 }
2284 else
2285 {
2286 return 0; // Failed to acquire the lock, it's already visibly locked.
2287 }
2288}
2289
2290static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002291__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002293 char const * const func = "omp_test_lock";
2294 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2295 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296 }
2297
2298 int retval = __kmp_test_adaptive_lock( lck, gtid );
2299
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002300 if ( retval ) {
2301 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002302 }
2303 return retval;
2304}
2305
2306// Block until we can acquire a speculative, adaptive lock.
2307// We check whether we should be trying to speculate.
2308// If we should be, we check the real lock to see if it is free,
2309// and, if not, pause without attempting to acquire it until it is.
2310// Then we try the speculative acquire.
2311// This means that although we suffer from lemmings a little (
2312// because all we can't acquire the lock speculatively until
2313// the queue of threads waiting has cleared), we don't get into a
2314// state where we can never acquire the lock speculatively (because we
2315// force the queue to clear by preventing new arrivals from entering the
2316// queue).
2317// This does mean that when we're trying to break lemmings, the lock
2318// is no longer fair. However OpenMP makes no guarantee that its
2319// locks are fair, so this isn't a real problem.
2320static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002321__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002322{
2323 if ( __kmp_should_speculate( lck, gtid ) )
2324 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002325 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002326 {
2327 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2328 return;
2329 // We tried speculation and failed, so give up.
2330 }
2331 else
2332 {
2333 // We can't try speculation until the lock is free, so we
2334 // pause here (without suspending on the queueing lock,
2335 // to allow it to drain, then try again.
2336 // All other threads will also see the same result for
2337 // shouldSpeculate, so will be doing the same if they
2338 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002339 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002340 {
2341 KMP_INC_STAT(lck,lemmingYields);
2342 __kmp_yield (TRUE);
2343 }
2344
2345 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2346 return;
2347 }
2348 }
2349
2350 // Speculative acquisition failed, so acquire it non-speculatively.
2351 // Count the non-speculative acquire attempt
2352 lck->lk.adaptive.acquire_attempts++;
2353
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002354 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355 // We have acquired the base lock, so count that.
2356 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2357}
2358
2359static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002360__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002362 char const * const func = "omp_set_lock";
2363 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2364 KMP_FATAL( LockIsUninitialized, func );
2365 }
2366 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2367 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368 }
2369
2370 __kmp_acquire_adaptive_lock( lck, gtid );
2371
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002372 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002373}
2374
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002375static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002378 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002379 { // If the lock doesn't look claimed we must be speculating.
2380 // (Or the user's code is buggy and they're releasing without locking;
2381 // if we had XTEST we'd be able to check that case...)
2382 _xend(); // Exit speculation
2383 __kmp_update_badness_after_success( lck );
2384 }
2385 else
2386 { // Since the lock *is* visibly locked we're not speculating,
2387 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002388 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002390 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002391}
2392
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002393static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002394__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002396 char const * const func = "omp_unset_lock";
2397 KMP_MB(); /* in case another processor initialized lock */
2398 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2399 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002401 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2402 KMP_FATAL( LockUnsettingFree, func );
2403 }
2404 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2405 KMP_FATAL( LockUnsettingSetByAnother, func );
2406 }
2407 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002408 __kmp_release_adaptive_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002409 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002410}
2411
2412static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002413__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002415 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416 lck->lk.adaptive.badness = 0;
2417 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2418 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2419 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2420#if KMP_DEBUG_ADAPTIVE_LOCKS
2421 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2422#endif
2423 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2424}
2425
2426static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002427__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428{
2429 __kmp_init_adaptive_lock( lck );
2430}
2431
2432static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002433__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434{
2435#if KMP_DEBUG_ADAPTIVE_LOCKS
2436 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2437#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002438 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439 // Nothing needed for the speculative part.
2440}
2441
2442static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002443__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002445 char const * const func = "omp_destroy_lock";
2446 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2447 KMP_FATAL( LockIsUninitialized, func );
2448 }
2449 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2450 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451 }
2452 __kmp_destroy_adaptive_lock( lck );
2453}
2454
2455
2456#endif // KMP_USE_ADAPTIVE_LOCKS
2457
2458
2459/* ------------------------------------------------------------------------ */
2460/* DRDPA ticket locks */
2461/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2462
2463static kmp_int32
2464__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2465{
2466 return TCR_4( lck->lk.owner_id ) - 1;
2467}
2468
2469static inline bool
2470__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2471{
2472 return lck->lk.depth_locked != -1;
2473}
2474
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002475__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2477{
2478 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2479 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2480 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2481 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2482 TCR_PTR(lck->lk.polls); // volatile load
2483
2484#ifdef USE_LOCK_PROFILE
2485 if (TCR_8(polls[ticket & mask].poll) != ticket)
2486 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2487 /* else __kmp_printf( "." );*/
2488#endif /* USE_LOCK_PROFILE */
2489
2490 //
2491 // Now spin-wait, but reload the polls pointer and mask, in case the
2492 // polling area has been reconfigured. Unless it is reconfigured, the
2493 // reloads stay in L1 cache and are cheap.
2494 //
2495 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2496 //
2497 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2498 // and poll to be re-read every spin iteration.
2499 //
2500 kmp_uint32 spins;
2501
2502 KMP_FSYNC_PREPARE(lck);
2503 KMP_INIT_YIELD(spins);
2504 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002506 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002507 // CPU Pause is in the macros for yield.
2508 //
2509 KMP_YIELD(TCR_4(__kmp_nth)
2510 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2511 KMP_YIELD_SPIN(spins);
2512
2513 // Re-read the mask and the poll pointer from the lock structure.
2514 //
2515 // Make certain that "mask" is read before "polls" !!!
2516 //
2517 // If another thread picks reconfigures the polling area and updates
2518 // their values, and we get the new value of mask and the old polls
2519 // pointer, we could access memory beyond the end of the old polling
2520 // area.
2521 //
2522 mask = TCR_8(lck->lk.mask); // volatile load
2523 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2524 TCR_PTR(lck->lk.polls); // volatile load
2525 }
2526
2527 //
2528 // Critical section starts here
2529 //
2530 KMP_FSYNC_ACQUIRED(lck);
2531 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2532 ticket, lck));
2533 lck->lk.now_serving = ticket; // non-volatile store
2534
2535 //
2536 // Deallocate a garbage polling area if we know that we are the last
2537 // thread that could possibly access it.
2538 //
2539 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2540 // ticket.
2541 //
2542 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2543 __kmp_free((void *)lck->lk.old_polls);
2544 lck->lk.old_polls = NULL;
2545 lck->lk.cleanup_ticket = 0;
2546 }
2547
2548 //
2549 // Check to see if we should reconfigure the polling area.
2550 // If there is still a garbage polling area to be deallocated from a
2551 // previous reconfiguration, let a later thread reconfigure it.
2552 //
2553 if (lck->lk.old_polls == NULL) {
2554 bool reconfigure = false;
2555 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2556 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2557
2558 if (TCR_4(__kmp_nth)
2559 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2560 //
2561 // We are in oversubscription mode. Contract the polling area
2562 // down to a single location, if that hasn't been done already.
2563 //
2564 if (num_polls > 1) {
2565 reconfigure = true;
2566 num_polls = TCR_4(lck->lk.num_polls);
2567 mask = 0;
2568 num_polls = 1;
2569 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2570 __kmp_allocate(num_polls * sizeof(*polls));
2571 polls[0].poll = ticket;
2572 }
2573 }
2574 else {
2575 //
2576 // We are in under/fully subscribed mode. Check the number of
2577 // threads waiting on the lock. The size of the polling area
2578 // should be at least the number of threads waiting.
2579 //
2580 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2581 if (num_waiting > num_polls) {
2582 kmp_uint32 old_num_polls = num_polls;
2583 reconfigure = true;
2584 do {
2585 mask = (mask << 1) | 1;
2586 num_polls *= 2;
2587 } while (num_polls <= num_waiting);
2588
2589 //
2590 // Allocate the new polling area, and copy the relevant portion
2591 // of the old polling area to the new area. __kmp_allocate()
2592 // zeroes the memory it allocates, and most of the old area is
2593 // just zero padding, so we only copy the release counters.
2594 //
2595 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2596 __kmp_allocate(num_polls * sizeof(*polls));
2597 kmp_uint32 i;
2598 for (i = 0; i < old_num_polls; i++) {
2599 polls[i].poll = old_polls[i].poll;
2600 }
2601 }
2602 }
2603
2604 if (reconfigure) {
2605 //
2606 // Now write the updated fields back to the lock structure.
2607 //
2608 // Make certain that "polls" is written before "mask" !!!
2609 //
2610 // If another thread picks up the new value of mask and the old
2611 // polls pointer , it could access memory beyond the end of the
2612 // old polling area.
2613 //
2614 // On x86, we need memory fences.
2615 //
2616 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2617 ticket, lck, num_polls));
2618
2619 lck->lk.old_polls = old_polls; // non-volatile store
2620 lck->lk.polls = polls; // volatile store
2621
2622 KMP_MB();
2623
2624 lck->lk.num_polls = num_polls; // non-volatile store
2625 lck->lk.mask = mask; // volatile store
2626
2627 KMP_MB();
2628
2629 //
2630 // Only after the new polling area and mask have been flushed
2631 // to main memory can we update the cleanup ticket field.
2632 //
2633 // volatile load / non-volatile store
2634 //
2635 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2636 }
2637 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002638 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639}
2640
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002641int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2643{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002644 return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002645}
2646
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002647static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2649{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002650 char const * const func = "omp_set_lock";
2651 if ( lck->lk.initialized != lck ) {
2652 KMP_FATAL( LockIsUninitialized, func );
2653 }
2654 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2655 KMP_FATAL( LockNestableUsedAsSimple, func );
2656 }
2657 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2658 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002659 }
2660
2661 __kmp_acquire_drdpa_lock( lck, gtid );
2662
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002663 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002664 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002665}
2666
2667int
2668__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2669{
2670 //
2671 // First get a ticket, then read the polls pointer and the mask.
2672 // The polls pointer must be read before the mask!!! (See above)
2673 //
2674 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2675 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2676 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2677 TCR_PTR(lck->lk.polls); // volatile load
2678 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2679 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2680 kmp_uint64 next_ticket = ticket + 1;
2681 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2682 ticket, next_ticket)) {
2683 KMP_FSYNC_ACQUIRED(lck);
2684 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2685 ticket, lck));
2686 lck->lk.now_serving = ticket; // non-volatile store
2687
2688 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002689 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002690 // we would want to reconfigure the polling area. We might
2691 // have the cleanup ticket value (which says that it is now
2692 // safe to deallocate old_polls), but we'll let a later thread
2693 // which calls __kmp_acquire_lock do that - this routine
2694 // isn't supposed to block, and we would risk blocks if we
2695 // called __kmp_free() to do the deallocation.
2696 //
2697 return TRUE;
2698 }
2699 }
2700 return FALSE;
2701}
2702
2703static int
2704__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2705{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002706 char const * const func = "omp_test_lock";
2707 if ( lck->lk.initialized != lck ) {
2708 KMP_FATAL( LockIsUninitialized, func );
2709 }
2710 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2711 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712 }
2713
2714 int retval = __kmp_test_drdpa_lock( lck, gtid );
2715
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002716 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717 lck->lk.owner_id = gtid + 1;
2718 }
2719 return retval;
2720}
2721
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002722int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2724{
2725 //
2726 // Read the ticket value from the lock data struct, then the polls
2727 // pointer and the mask. The polls pointer must be read before the
2728 // mask!!! (See above)
2729 //
2730 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2731 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2732 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2733 TCR_PTR(lck->lk.polls); // volatile load
2734 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2735 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2736 ticket - 1, lck));
2737 KMP_FSYNC_RELEASING(lck);
2738 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002739 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740}
2741
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002742static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002743__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2744{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002745 char const * const func = "omp_unset_lock";
2746 KMP_MB(); /* in case another processor initialized lock */
2747 if ( lck->lk.initialized != lck ) {
2748 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002750 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2751 KMP_FATAL( LockNestableUsedAsSimple, func );
2752 }
2753 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2754 KMP_FATAL( LockUnsettingFree, func );
2755 }
2756 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2757 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2758 KMP_FATAL( LockUnsettingSetByAnother, func );
2759 }
2760 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002761 return __kmp_release_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002762}
2763
2764void
2765__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2766{
2767 lck->lk.location = NULL;
2768 lck->lk.mask = 0;
2769 lck->lk.num_polls = 1;
2770 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2771 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2772 lck->lk.cleanup_ticket = 0;
2773 lck->lk.old_polls = NULL;
2774 lck->lk.next_ticket = 0;
2775 lck->lk.now_serving = 0;
2776 lck->lk.owner_id = 0; // no thread owns the lock.
2777 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2778 lck->lk.initialized = lck;
2779
2780 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2781}
2782
2783static void
2784__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2785{
2786 __kmp_init_drdpa_lock( lck );
2787}
2788
2789void
2790__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2791{
2792 lck->lk.initialized = NULL;
2793 lck->lk.location = NULL;
2794 if (lck->lk.polls != NULL) {
2795 __kmp_free((void *)lck->lk.polls);
2796 lck->lk.polls = NULL;
2797 }
2798 if (lck->lk.old_polls != NULL) {
2799 __kmp_free((void *)lck->lk.old_polls);
2800 lck->lk.old_polls = NULL;
2801 }
2802 lck->lk.mask = 0;
2803 lck->lk.num_polls = 0;
2804 lck->lk.cleanup_ticket = 0;
2805 lck->lk.next_ticket = 0;
2806 lck->lk.now_serving = 0;
2807 lck->lk.owner_id = 0;
2808 lck->lk.depth_locked = -1;
2809}
2810
2811static void
2812__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2813{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002814 char const * const func = "omp_destroy_lock";
2815 if ( lck->lk.initialized != lck ) {
2816 KMP_FATAL( LockIsUninitialized, func );
2817 }
2818 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2819 KMP_FATAL( LockNestableUsedAsSimple, func );
2820 }
2821 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2822 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823 }
2824 __kmp_destroy_drdpa_lock( lck );
2825}
2826
2827
2828//
2829// nested drdpa ticket locks
2830//
2831
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002832int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002833__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834{
2835 KMP_DEBUG_ASSERT( gtid >= 0 );
2836
2837 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2838 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002839 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002840 }
2841 else {
2842 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2843 KMP_MB();
2844 lck->lk.depth_locked = 1;
2845 KMP_MB();
2846 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002847 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848 }
2849}
2850
2851static void
2852__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2853{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002854 char const * const func = "omp_set_nest_lock";
2855 if ( lck->lk.initialized != lck ) {
2856 KMP_FATAL( LockIsUninitialized, func );
2857 }
2858 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2859 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860 }
2861 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2862}
2863
2864int
2865__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2866{
2867 int retval;
2868
2869 KMP_DEBUG_ASSERT( gtid >= 0 );
2870
2871 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2872 retval = ++lck->lk.depth_locked;
2873 }
2874 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2875 retval = 0;
2876 }
2877 else {
2878 KMP_MB();
2879 retval = lck->lk.depth_locked = 1;
2880 KMP_MB();
2881 lck->lk.owner_id = gtid + 1;
2882 }
2883 return retval;
2884}
2885
2886static int
2887__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2888{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002889 char const * const func = "omp_test_nest_lock";
2890 if ( lck->lk.initialized != lck ) {
2891 KMP_FATAL( LockIsUninitialized, func );
2892 }
2893 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2894 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002895 }
2896 return __kmp_test_nested_drdpa_lock( lck, gtid );
2897}
2898
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002899int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002900__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2901{
2902 KMP_DEBUG_ASSERT( gtid >= 0 );
2903
2904 KMP_MB();
2905 if ( --(lck->lk.depth_locked) == 0 ) {
2906 KMP_MB();
2907 lck->lk.owner_id = 0;
2908 __kmp_release_drdpa_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002909 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002910 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002911 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912}
2913
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002914static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002915__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2916{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002917 char const * const func = "omp_unset_nest_lock";
2918 KMP_MB(); /* in case another processor initialized lock */
2919 if ( lck->lk.initialized != lck ) {
2920 KMP_FATAL( LockIsUninitialized, func );
2921 }
2922 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2923 KMP_FATAL( LockSimpleUsedAsNestable, func );
2924 }
2925 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2926 KMP_FATAL( LockUnsettingFree, func );
2927 }
2928 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2929 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002931 return __kmp_release_nested_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002932}
2933
2934void
2935__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2936{
2937 __kmp_init_drdpa_lock( lck );
2938 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2939}
2940
2941static void
2942__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2943{
2944 __kmp_init_nested_drdpa_lock( lck );
2945}
2946
2947void
2948__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2949{
2950 __kmp_destroy_drdpa_lock( lck );
2951 lck->lk.depth_locked = 0;
2952}
2953
2954static void
2955__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2956{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002957 char const * const func = "omp_destroy_nest_lock";
2958 if ( lck->lk.initialized != lck ) {
2959 KMP_FATAL( LockIsUninitialized, func );
2960 }
2961 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2962 KMP_FATAL( LockSimpleUsedAsNestable, func );
2963 }
2964 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2965 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002966 }
2967 __kmp_destroy_nested_drdpa_lock( lck );
2968}
2969
2970
2971//
2972// access functions to fields which don't exist for all lock kinds.
2973//
2974
2975static int
2976__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2977{
2978 return lck == lck->lk.initialized;
2979}
2980
2981static const ident_t *
2982__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2983{
2984 return lck->lk.location;
2985}
2986
2987static void
2988__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2989{
2990 lck->lk.location = loc;
2991}
2992
2993static kmp_lock_flags_t
2994__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2995{
2996 return lck->lk.flags;
2997}
2998
2999static void
3000__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3001{
3002 lck->lk.flags = flags;
3003}
3004
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003005#if KMP_USE_DYNAMIC_LOCK
3006
3007// Definitions of lock hints.
3008# ifndef __OMP_H
3009typedef enum kmp_lock_hint_t {
3010 kmp_lock_hint_none = 0,
3011 kmp_lock_hint_contended,
3012 kmp_lock_hint_uncontended,
3013 kmp_lock_hint_nonspeculative,
3014 kmp_lock_hint_speculative,
3015 kmp_lock_hint_adaptive,
3016} kmp_lock_hint_t;
3017# endif
3018
3019// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3020#define expand_init_lock(l, a) \
3021static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003022 *lck = KMP_LOCK_FREE(l); \
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003023 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
3024}
3025FOREACH_D_LOCK(expand_init_lock, 0)
3026#undef expand_init_lock
3027
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003028#if KMP_HAS_HLE
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003029
3030// HLE lock functions - imported from the testbed runtime.
3031#if KMP_MIC
3032# define machine_pause() _mm_delay_32(10) // TODO: find the right argument
3033#else
3034# define machine_pause() _mm_pause()
3035#endif
3036#define HLE_ACQUIRE ".byte 0xf2;"
3037#define HLE_RELEASE ".byte 0xf3;"
3038
3039static inline kmp_uint32
3040swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3041{
3042 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3043 : "+r"(v), "+m"(*p)
3044 :
3045 : "memory");
3046 return v;
3047}
3048
3049static void
3050__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3051{
3052 *lck = 0;
3053}
3054
3055static void
3056__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3057{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003058 // Use gtid for KMP_LOCK_BUSY if necessary
3059 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003060 int delay = 1;
3061 do {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003062 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003063 for (int i = delay; i != 0; --i)
3064 machine_pause();
3065 delay = ((delay << 1) | 1) & 7;
3066 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003067 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003068 }
3069}
3070
3071static void
3072__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3073{
3074 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3075}
3076
3077static void
3078__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3079{
3080 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3081 : "=m"(*lck)
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003082 : "r"(KMP_LOCK_FREE(hle))
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003083 : "memory");
3084}
3085
3086static void
3087__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3088{
3089 __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3090}
3091
3092static int
3093__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3094{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003095 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003096}
3097
3098static int
3099__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3100{
3101 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3102}
3103
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003104#endif // KMP_HAS_HLE
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003105
3106// Entry functions for indirect locks (first element of direct_*_ops[]).
3107static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3108static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3109static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3110static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3111static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3112static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3113static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3114static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3115
3116//
3117// Jump tables for the indirect lock functions.
3118// Only fill in the odd entries, that avoids the need to shift out the low bit.
3119//
3120#define expand_func0(l, op) 0,op##_##l##_##lock,
3121void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3122 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3123
3124#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3125void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3126 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3127
3128// Differentiates *lock and *lock_with_checks.
3129#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3130#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003131static void (*direct_set_tab[][KMP_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003132 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3133 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003134static void (*direct_unset_tab[][KMP_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003135 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3136 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3137
3138#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3139#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003140static int (*direct_test_tab[][KMP_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003141 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3142 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3143
3144// Exposes only one set of jump tables (*lock or *lock_with_checks).
3145void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3146void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3147int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3148
3149//
3150// Jump tables for the indirect lock functions.
3151//
3152#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3153void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3154 = { FOREACH_I_LOCK(expand_func4, init) };
3155void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3156 = { FOREACH_I_LOCK(expand_func4, destroy) };
3157
3158// Differentiates *lock and *lock_with_checks.
3159#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3160#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003161static void (*indirect_set_tab[][KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003162 = { { FOREACH_I_LOCK(expand_func5, acquire) },
3163 { FOREACH_I_LOCK(expand_func5c, acquire) } };
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003164static void (*indirect_unset_tab[][KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003165 = { { FOREACH_I_LOCK(expand_func5, release) },
3166 { FOREACH_I_LOCK(expand_func5c, release) } };
3167
3168#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3169#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003170static int (*indirect_test_tab[][KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003171 = { { FOREACH_I_LOCK(expand_func6, test) },
3172 { FOREACH_I_LOCK(expand_func6c, test) } };
3173
3174// Exposes only one set of jump tables (*lock or *lock_with_checks).
3175void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3176void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3177int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3178
3179// Lock index table.
3180kmp_indirect_lock_t **__kmp_indirect_lock_table;
3181kmp_lock_index_t __kmp_indirect_lock_table_size;
3182kmp_lock_index_t __kmp_indirect_lock_table_next;
3183
3184// Size of indirect locks.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003185static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003186 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3187#if KMP_USE_ADAPTIVE_LOCKS
3188 sizeof(kmp_adaptive_lock_t),
3189#endif
3190 sizeof(kmp_drdpa_lock_t),
3191 sizeof(kmp_tas_lock_t),
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003192#if KMP_HAS_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003193 sizeof(kmp_futex_lock_t),
3194#endif
3195 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3196 sizeof(kmp_drdpa_lock_t)
3197};
3198
3199// Jump tables for lock accessor/modifier.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003200void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3201void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3202const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3203kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003204
3205// Use different lock pools for different lock types.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003206static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003207
3208// Inserts the given lock ptr to the lock table.
3209kmp_lock_index_t
3210__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3211{
3212 kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3213 // Check capacity and double the size if required
3214 if (next >= __kmp_indirect_lock_table_size) {
3215 kmp_lock_index_t i;
3216 kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3217 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3218 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003219 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003220 __kmp_free(old_table);
3221 __kmp_indirect_lock_table_size = 2*next;
3222 }
3223 // Insert lck to the table and return the index.
3224 __kmp_indirect_lock_table[next] = lck;
3225 __kmp_indirect_lock_table_next++;
3226 return next;
3227}
3228
3229// User lock allocator for dynamically dispatched locks.
3230kmp_indirect_lock_t *
3231__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3232{
3233 kmp_indirect_lock_t *lck;
3234 kmp_lock_index_t idx;
3235
3236 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3237
3238 if (__kmp_indirect_lock_pool[tag] != NULL) {
3239 lck = __kmp_indirect_lock_pool[tag];
3240 if (OMP_LOCK_T_SIZE < sizeof(void *))
3241 idx = lck->lock->pool.index;
3242 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3243 } else {
3244 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3245 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3246 if (OMP_LOCK_T_SIZE < sizeof(void *))
3247 idx = __kmp_insert_indirect_lock(lck);
3248 }
3249
3250 __kmp_release_lock(&__kmp_global_lock, gtid);
3251
3252 lck->type = tag;
3253
3254 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3255 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3256 } else {
3257 *((kmp_indirect_lock_t **)user_lock) = lck;
3258 }
3259
3260 return lck;
3261}
3262
3263// User lock lookup for dynamically dispatched locks.
3264static __forceinline
3265kmp_indirect_lock_t *
3266__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3267{
3268 if (__kmp_env_consistency_check) {
3269 kmp_indirect_lock_t *lck = NULL;
3270 if (user_lock == NULL) {
3271 KMP_FATAL(LockIsUninitialized, func);
3272 }
3273 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003274 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003275 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3276 KMP_FATAL(LockIsUninitialized, func);
3277 }
3278 lck = __kmp_indirect_lock_table[idx];
3279 } else {
3280 lck = *((kmp_indirect_lock_t **)user_lock);
3281 }
3282 if (lck == NULL) {
3283 KMP_FATAL(LockIsUninitialized, func);
3284 }
3285 return lck;
3286 } else {
3287 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003288 return __kmp_indirect_lock_table[KMP_EXTRACT_I_INDEX(user_lock)];
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003289 } else {
3290 return *((kmp_indirect_lock_t **)user_lock);
3291 }
3292 }
3293}
3294
3295static void
3296__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3297{
3298#if KMP_USE_ADAPTIVE_LOCKS
3299 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3300 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3301 seq = lockseq_queuing;
3302 }
3303#endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003304 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003305 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003306 KMP_I_LOCK_FUNC(l, init)(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003307 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3308}
3309
3310static void
3311__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3312{
3313 kmp_uint32 gtid = __kmp_entry_gtid();
3314 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003315 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003316 kmp_indirect_locktag_t tag = l->type;
3317
3318 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3319
3320 // Use the base lock's space to keep the pool chain.
3321 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3322 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003323 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003324 }
3325 __kmp_indirect_lock_pool[tag] = l;
3326
3327 __kmp_release_lock(&__kmp_global_lock, gtid);
3328}
3329
3330static void
3331__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3332{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003333 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3334 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003335}
3336
3337static void
3338__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3339{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003340 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3341 KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003342}
3343
3344static int
3345__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3346{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003347 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3348 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003349}
3350
3351static void
3352__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3353{
3354 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003355 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003356}
3357
3358static void
3359__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3360{
3361 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003362 KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003363}
3364
3365static int
3366__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3367{
3368 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003369 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003370}
3371
3372kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3373
3374// Initialize a hinted lock.
3375void
3376__kmp_init_lock_hinted(void **lock, int hint)
3377{
3378 kmp_dyna_lockseq_t seq;
3379 switch (hint) {
3380 case kmp_lock_hint_uncontended:
3381 seq = lockseq_tas;
3382 break;
3383 case kmp_lock_hint_speculative:
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003384#if KMP_HAS_HLE
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003385 seq = lockseq_hle;
3386#else
3387 seq = lockseq_tas;
3388#endif
3389 break;
3390 case kmp_lock_hint_adaptive:
3391#if KMP_USE_ADAPTIVE_LOCKS
3392 seq = lockseq_adaptive;
3393#else
3394 seq = lockseq_queuing;
3395#endif
3396 break;
3397 // Defaults to queuing locks.
3398 case kmp_lock_hint_contended:
3399 case kmp_lock_hint_nonspeculative:
3400 default:
3401 seq = lockseq_queuing;
3402 break;
3403 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003404 if (KMP_IS_D_LOCK(seq)) {
3405 KMP_INIT_D_LOCK(lock, seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003406#if USE_ITT_BUILD
3407 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3408#endif
3409 } else {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003410 KMP_INIT_I_LOCK(lock, seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003411#if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003412 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003413 __kmp_itt_lock_creating(ilk->lock, NULL);
3414#endif
3415 }
3416}
3417
3418// This is used only in kmp_error.c when consistency checking is on.
3419kmp_int32
3420__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3421{
3422 switch (seq) {
3423 case lockseq_tas:
3424 case lockseq_nested_tas:
3425 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003426#if KMP_HAS_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003427 case lockseq_futex:
3428 case lockseq_nested_futex:
3429 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3430#endif
3431 case lockseq_ticket:
3432 case lockseq_nested_ticket:
3433 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3434 case lockseq_queuing:
3435 case lockseq_nested_queuing:
3436#if KMP_USE_ADAPTIVE_LOCKS
3437 case lockseq_adaptive:
3438 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3439#endif
3440 case lockseq_drdpa:
3441 case lockseq_nested_drdpa:
3442 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3443 default:
3444 return 0;
3445 }
3446}
3447
3448// The value initialized from KMP_LOCK_KIND needs to be translated to its
3449// nested version.
3450void
3451__kmp_init_nest_lock_hinted(void **lock, int hint)
3452{
3453 kmp_dyna_lockseq_t seq;
3454 switch (hint) {
3455 case kmp_lock_hint_uncontended:
3456 seq = lockseq_nested_tas;
3457 break;
3458 // Defaults to queuing locks.
3459 case kmp_lock_hint_contended:
3460 case kmp_lock_hint_nonspeculative:
3461 default:
3462 seq = lockseq_nested_queuing;
3463 break;
3464 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003465 KMP_INIT_I_LOCK(lock, seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003466#if USE_ITT_BUILD
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003467 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003468 __kmp_itt_lock_creating(ilk->lock, NULL);
3469#endif
3470}
3471
3472// Initializes the lock table for indirect locks.
3473static void
3474__kmp_init_indirect_lock_table()
3475{
3476 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3477 __kmp_indirect_lock_table_size = 1024;
3478 __kmp_indirect_lock_table_next = 0;
3479}
3480
3481#if KMP_USE_ADAPTIVE_LOCKS
3482# define init_lock_func(table, expand) { \
3483 table[locktag_ticket] = expand(ticket); \
3484 table[locktag_queuing] = expand(queuing); \
3485 table[locktag_adaptive] = expand(queuing); \
3486 table[locktag_drdpa] = expand(drdpa); \
3487 table[locktag_nested_ticket] = expand(ticket); \
3488 table[locktag_nested_queuing] = expand(queuing); \
3489 table[locktag_nested_drdpa] = expand(drdpa); \
3490}
3491#else
3492# define init_lock_func(table, expand) { \
3493 table[locktag_ticket] = expand(ticket); \
3494 table[locktag_queuing] = expand(queuing); \
3495 table[locktag_drdpa] = expand(drdpa); \
3496 table[locktag_nested_ticket] = expand(ticket); \
3497 table[locktag_nested_queuing] = expand(queuing); \
3498 table[locktag_nested_drdpa] = expand(drdpa); \
3499}
3500#endif // KMP_USE_ADAPTIVE_LOCKS
3501
3502// Initializes data for dynamic user locks.
3503void
3504__kmp_init_dynamic_user_locks()
3505{
3506 // Initialize jump table location
3507 int offset = (__kmp_env_consistency_check)? 1: 0;
3508 __kmp_direct_set_ops = direct_set_tab[offset];
3509 __kmp_direct_unset_ops = direct_unset_tab[offset];
3510 __kmp_direct_test_ops = direct_test_tab[offset];
3511 __kmp_indirect_set_ops = indirect_set_tab[offset];
3512 __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3513 __kmp_indirect_test_ops = indirect_test_tab[offset];
3514 __kmp_init_indirect_lock_table();
3515
3516 // Initialize lock accessor/modifier
3517 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3518#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3519 init_lock_func(__kmp_indirect_set_location, expand_func);
3520#undef expand_func
3521#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3522 init_lock_func(__kmp_indirect_set_flags, expand_func);
3523#undef expand_func
3524#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3525 init_lock_func(__kmp_indirect_get_location, expand_func);
3526#undef expand_func
3527#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3528 init_lock_func(__kmp_indirect_get_flags, expand_func);
3529#undef expand_func
3530
3531 __kmp_init_user_locks = TRUE;
3532}
3533
3534// Clean up the lock table.
3535void
3536__kmp_cleanup_indirect_user_locks()
3537{
3538 kmp_lock_index_t i;
3539 int k;
3540
3541 // Clean up locks in the pools first (they were already destroyed before going into the pools).
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003542 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003543 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3544 while (l != NULL) {
3545 kmp_indirect_lock_t *ll = l;
3546 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3547 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3548 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3549 }
3550 __kmp_free(ll->lock);
3551 __kmp_free(ll);
3552 }
3553 }
3554 // Clean up the remaining undestroyed locks.
3555 for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3556 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3557 if (l != NULL) {
3558 // Locks not destroyed explicitly need to be destroyed here.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003559 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003560 __kmp_free(l->lock);
3561 __kmp_free(l);
3562 }
3563 }
3564 // Free the table
3565 __kmp_free(__kmp_indirect_lock_table);
3566
3567 __kmp_init_user_locks = FALSE;
3568}
3569
3570enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3571int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3572
3573#else // KMP_USE_DYNAMIC_LOCK
3574
Jim Cownie5e8470a2013-09-27 10:38:44 +00003575/* ------------------------------------------------------------------------ */
3576/* user locks
3577 *
3578 * They are implemented as a table of function pointers which are set to the
3579 * lock functions of the appropriate kind, once that has been determined.
3580 */
3581
3582enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3583
3584size_t __kmp_base_user_lock_size = 0;
3585size_t __kmp_user_lock_size = 0;
3586
3587kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003588int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003589
3590int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003591int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003592void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3593void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3594void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003595int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003596
3597int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003598int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3600void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3601
3602int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3603const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3604void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3605kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3606void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3607
3608void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3609{
3610 switch ( user_lock_kind ) {
3611 case lk_default:
3612 default:
3613 KMP_ASSERT( 0 );
3614
3615 case lk_tas: {
3616 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3617 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3618
3619 __kmp_get_user_lock_owner_ =
3620 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3621 ( &__kmp_get_tas_lock_owner );
3622
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003623 if ( __kmp_env_consistency_check ) {
3624 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3625 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3626 }
3627 else {
3628 KMP_BIND_USER_LOCK(tas);
3629 KMP_BIND_NESTED_USER_LOCK(tas);
3630 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003631
3632 __kmp_destroy_user_lock_ =
3633 ( void ( * )( kmp_user_lock_p ) )
3634 ( &__kmp_destroy_tas_lock );
3635
Jim Cownie5e8470a2013-09-27 10:38:44 +00003636 __kmp_is_user_lock_initialized_ =
3637 ( int ( * )( kmp_user_lock_p ) ) NULL;
3638
3639 __kmp_get_user_lock_location_ =
3640 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3641
3642 __kmp_set_user_lock_location_ =
3643 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3644
3645 __kmp_get_user_lock_flags_ =
3646 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3647
3648 __kmp_set_user_lock_flags_ =
3649 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3650 }
3651 break;
3652
Jim Cownie181b4bb2013-12-23 17:28:57 +00003653#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003654
3655 case lk_futex: {
3656 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3657 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3658
3659 __kmp_get_user_lock_owner_ =
3660 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3661 ( &__kmp_get_futex_lock_owner );
3662
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003663 if ( __kmp_env_consistency_check ) {
3664 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3665 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3666 }
3667 else {
3668 KMP_BIND_USER_LOCK(futex);
3669 KMP_BIND_NESTED_USER_LOCK(futex);
3670 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003671
3672 __kmp_destroy_user_lock_ =
3673 ( void ( * )( kmp_user_lock_p ) )
3674 ( &__kmp_destroy_futex_lock );
3675
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676 __kmp_is_user_lock_initialized_ =
3677 ( int ( * )( kmp_user_lock_p ) ) NULL;
3678
3679 __kmp_get_user_lock_location_ =
3680 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3681
3682 __kmp_set_user_lock_location_ =
3683 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3684
3685 __kmp_get_user_lock_flags_ =
3686 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3687
3688 __kmp_set_user_lock_flags_ =
3689 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3690 }
3691 break;
3692
Jim Cownie181b4bb2013-12-23 17:28:57 +00003693#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003694
3695 case lk_ticket: {
3696 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3697 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3698
3699 __kmp_get_user_lock_owner_ =
3700 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3701 ( &__kmp_get_ticket_lock_owner );
3702
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003703 if ( __kmp_env_consistency_check ) {
3704 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3705 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3706 }
3707 else {
3708 KMP_BIND_USER_LOCK(ticket);
3709 KMP_BIND_NESTED_USER_LOCK(ticket);
3710 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711
3712 __kmp_destroy_user_lock_ =
3713 ( void ( * )( kmp_user_lock_p ) )
3714 ( &__kmp_destroy_ticket_lock );
3715
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716 __kmp_is_user_lock_initialized_ =
3717 ( int ( * )( kmp_user_lock_p ) )
3718 ( &__kmp_is_ticket_lock_initialized );
3719
3720 __kmp_get_user_lock_location_ =
3721 ( const ident_t * ( * )( kmp_user_lock_p ) )
3722 ( &__kmp_get_ticket_lock_location );
3723
3724 __kmp_set_user_lock_location_ =
3725 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3726 ( &__kmp_set_ticket_lock_location );
3727
3728 __kmp_get_user_lock_flags_ =
3729 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3730 ( &__kmp_get_ticket_lock_flags );
3731
3732 __kmp_set_user_lock_flags_ =
3733 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3734 ( &__kmp_set_ticket_lock_flags );
3735 }
3736 break;
3737
3738 case lk_queuing: {
3739 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3740 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3741
3742 __kmp_get_user_lock_owner_ =
3743 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3744 ( &__kmp_get_queuing_lock_owner );
3745
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003746 if ( __kmp_env_consistency_check ) {
3747 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3748 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3749 }
3750 else {
3751 KMP_BIND_USER_LOCK(queuing);
3752 KMP_BIND_NESTED_USER_LOCK(queuing);
3753 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754
3755 __kmp_destroy_user_lock_ =
3756 ( void ( * )( kmp_user_lock_p ) )
3757 ( &__kmp_destroy_queuing_lock );
3758
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759 __kmp_is_user_lock_initialized_ =
3760 ( int ( * )( kmp_user_lock_p ) )
3761 ( &__kmp_is_queuing_lock_initialized );
3762
3763 __kmp_get_user_lock_location_ =
3764 ( const ident_t * ( * )( kmp_user_lock_p ) )
3765 ( &__kmp_get_queuing_lock_location );
3766
3767 __kmp_set_user_lock_location_ =
3768 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3769 ( &__kmp_set_queuing_lock_location );
3770
3771 __kmp_get_user_lock_flags_ =
3772 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3773 ( &__kmp_get_queuing_lock_flags );
3774
3775 __kmp_set_user_lock_flags_ =
3776 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3777 ( &__kmp_set_queuing_lock_flags );
3778 }
3779 break;
3780
3781#if KMP_USE_ADAPTIVE_LOCKS
3782 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003783 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3784 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003785
3786 __kmp_get_user_lock_owner_ =
3787 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3788 ( &__kmp_get_queuing_lock_owner );
3789
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003790 if ( __kmp_env_consistency_check ) {
3791 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3792 }
3793 else {
3794 KMP_BIND_USER_LOCK(adaptive);
3795 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003796
3797 __kmp_destroy_user_lock_ =
3798 ( void ( * )( kmp_user_lock_p ) )
3799 ( &__kmp_destroy_adaptive_lock );
3800
3801 __kmp_is_user_lock_initialized_ =
3802 ( int ( * )( kmp_user_lock_p ) )
3803 ( &__kmp_is_queuing_lock_initialized );
3804
3805 __kmp_get_user_lock_location_ =
3806 ( const ident_t * ( * )( kmp_user_lock_p ) )
3807 ( &__kmp_get_queuing_lock_location );
3808
3809 __kmp_set_user_lock_location_ =
3810 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3811 ( &__kmp_set_queuing_lock_location );
3812
3813 __kmp_get_user_lock_flags_ =
3814 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3815 ( &__kmp_get_queuing_lock_flags );
3816
3817 __kmp_set_user_lock_flags_ =
3818 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3819 ( &__kmp_set_queuing_lock_flags );
3820
3821 }
3822 break;
3823#endif // KMP_USE_ADAPTIVE_LOCKS
3824
3825 case lk_drdpa: {
3826 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3827 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3828
3829 __kmp_get_user_lock_owner_ =
3830 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3831 ( &__kmp_get_drdpa_lock_owner );
3832
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003833 if ( __kmp_env_consistency_check ) {
3834 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3835 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3836 }
3837 else {
3838 KMP_BIND_USER_LOCK(drdpa);
3839 KMP_BIND_NESTED_USER_LOCK(drdpa);
3840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003841
3842 __kmp_destroy_user_lock_ =
3843 ( void ( * )( kmp_user_lock_p ) )
3844 ( &__kmp_destroy_drdpa_lock );
3845
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846 __kmp_is_user_lock_initialized_ =
3847 ( int ( * )( kmp_user_lock_p ) )
3848 ( &__kmp_is_drdpa_lock_initialized );
3849
3850 __kmp_get_user_lock_location_ =
3851 ( const ident_t * ( * )( kmp_user_lock_p ) )
3852 ( &__kmp_get_drdpa_lock_location );
3853
3854 __kmp_set_user_lock_location_ =
3855 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3856 ( &__kmp_set_drdpa_lock_location );
3857
3858 __kmp_get_user_lock_flags_ =
3859 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3860 ( &__kmp_get_drdpa_lock_flags );
3861
3862 __kmp_set_user_lock_flags_ =
3863 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3864 ( &__kmp_set_drdpa_lock_flags );
3865 }
3866 break;
3867 }
3868}
3869
3870
3871// ----------------------------------------------------------------------------
3872// User lock table & lock allocation
3873
3874kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3875kmp_user_lock_p __kmp_lock_pool = NULL;
3876
3877// Lock block-allocation support.
3878kmp_block_of_locks* __kmp_lock_blocks = NULL;
3879int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3880
3881static kmp_lock_index_t
3882__kmp_lock_table_insert( kmp_user_lock_p lck )
3883{
3884 // Assume that kmp_global_lock is held upon entry/exit.
3885 kmp_lock_index_t index;
3886 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3887 kmp_lock_index_t size;
3888 kmp_user_lock_p *table;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003889 // Reallocate lock table.
3890 if ( __kmp_user_lock_table.allocated == 0 ) {
3891 size = 1024;
3892 }
3893 else {
3894 size = __kmp_user_lock_table.allocated * 2;
3895 }
3896 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003897 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003898 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
Jonathan Peyton1406f012015-05-22 22:35:51 +00003899 // We cannot free the previous table now, since it may be in use by other
Jim Cownie5e8470a2013-09-27 10:38:44 +00003900 // threads. So save the pointer to the previous table in in the first element of the
3901 // new table. All the tables will be organized into a list, and could be freed when
3902 // library shutting down.
3903 __kmp_user_lock_table.table = table;
3904 __kmp_user_lock_table.allocated = size;
3905 }
3906 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3907 index = __kmp_user_lock_table.used;
3908 __kmp_user_lock_table.table[ index ] = lck;
3909 ++ __kmp_user_lock_table.used;
3910 return index;
3911}
3912
3913static kmp_user_lock_p
3914__kmp_lock_block_allocate()
3915{
3916 // Assume that kmp_global_lock is held upon entry/exit.
3917 static int last_index = 0;
3918 if ( ( last_index >= __kmp_num_locks_in_block )
3919 || ( __kmp_lock_blocks == NULL ) ) {
3920 // Restart the index.
3921 last_index = 0;
3922 // Need to allocate a new block.
3923 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3924 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3925 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3926 // Set up the new block.
3927 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3928 new_block->next_block = __kmp_lock_blocks;
3929 new_block->locks = (void *)buffer;
3930 // Publish the new block.
3931 KMP_MB();
3932 __kmp_lock_blocks = new_block;
3933 }
3934 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3935 [ last_index * __kmp_user_lock_size ] ) );
3936 last_index++;
3937 return ret;
3938}
3939
3940//
3941// Get memory for a lock. It may be freshly allocated memory or reused memory
3942// from lock pool.
3943//
3944kmp_user_lock_p
3945__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3946 kmp_lock_flags_t flags )
3947{
3948 kmp_user_lock_p lck;
3949 kmp_lock_index_t index;
3950 KMP_DEBUG_ASSERT( user_lock );
3951
3952 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3953
3954 if ( __kmp_lock_pool == NULL ) {
3955 // Lock pool is empty. Allocate new memory.
3956 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3957 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3958 }
3959 else {
3960 lck = __kmp_lock_block_allocate();
3961 }
3962
3963 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3964 // and debugger has info on all allocated locks.
3965 index = __kmp_lock_table_insert( lck );
3966 }
3967 else {
3968 // Pick up lock from pool.
3969 lck = __kmp_lock_pool;
3970 index = __kmp_lock_pool->pool.index;
3971 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3972 }
3973
3974 //
3975 // We could potentially differentiate between nested and regular locks
3976 // here, and do the lock table lookup for regular locks only.
3977 //
3978 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3979 * ( (kmp_lock_index_t *) user_lock ) = index;
3980 }
3981 else {
3982 * ( (kmp_user_lock_p *) user_lock ) = lck;
3983 }
3984
3985 // mark the lock if it is critical section lock.
3986 __kmp_set_user_lock_flags( lck, flags );
3987
3988 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3989
3990 return lck;
3991}
3992
3993// Put lock's memory to pool for reusing.
3994void
3995__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3996{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003997 KMP_DEBUG_ASSERT( user_lock != NULL );
3998 KMP_DEBUG_ASSERT( lck != NULL );
3999
4000 __kmp_acquire_lock( & __kmp_global_lock, gtid );
4001
4002 lck->pool.next = __kmp_lock_pool;
4003 __kmp_lock_pool = lck;
4004 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4005 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4006 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4007 lck->pool.index = index;
4008 }
4009
4010 __kmp_release_lock( & __kmp_global_lock, gtid );
4011}
4012
4013kmp_user_lock_p
4014__kmp_lookup_user_lock( void **user_lock, char const *func )
4015{
4016 kmp_user_lock_p lck = NULL;
4017
4018 if ( __kmp_env_consistency_check ) {
4019 if ( user_lock == NULL ) {
4020 KMP_FATAL( LockIsUninitialized, func );
4021 }
4022 }
4023
4024 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4025 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4026 if ( __kmp_env_consistency_check ) {
4027 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4028 KMP_FATAL( LockIsUninitialized, func );
4029 }
4030 }
4031 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4032 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4033 lck = __kmp_user_lock_table.table[index];
4034 }
4035 else {
4036 lck = *( (kmp_user_lock_p *)user_lock );
4037 }
4038
4039 if ( __kmp_env_consistency_check ) {
4040 if ( lck == NULL ) {
4041 KMP_FATAL( LockIsUninitialized, func );
4042 }
4043 }
4044
4045 return lck;
4046}
4047
4048void
4049__kmp_cleanup_user_locks( void )
4050{
4051 //
4052 // Reset lock pool. Do not worry about lock in the pool -- we will free
4053 // them when iterating through lock table (it includes all the locks,
4054 // dead or alive).
4055 //
4056 __kmp_lock_pool = NULL;
4057
4058#define IS_CRITICAL(lck) \
4059 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4060 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4061
4062 //
4063 // Loop through lock table, free all locks.
4064 //
4065 // Do not free item [0], it is reserved for lock tables list.
4066 //
4067 // FIXME - we are iterating through a list of (pointers to) objects of
4068 // type union kmp_user_lock, but we have no way of knowing whether the
4069 // base type is currently "pool" or whatever the global user lock type
4070 // is.
4071 //
4072 // We are relying on the fact that for all of the user lock types
4073 // (except "tas"), the first field in the lock struct is the "initialized"
4074 // field, which is set to the address of the lock object itself when
4075 // the lock is initialized. When the union is of type "pool", the
4076 // first field is a pointer to the next object in the free list, which
4077 // will not be the same address as the object itself.
4078 //
4079 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4080 // will fail for "pool" objects on the free list. This must happen as
4081 // the "location" field of real user locks overlaps the "index" field
4082 // of "pool" objects.
4083 //
4084 // It would be better to run through the free list, and remove all "pool"
4085 // objects from the lock table before executing this loop. However,
4086 // "pool" objects do not always have their index field set (only on
4087 // lin_32e), and I don't want to search the lock table for the address
4088 // of every "pool" object on the free list.
4089 //
4090 while ( __kmp_user_lock_table.used > 1 ) {
4091 const ident *loc;
4092
4093 //
4094 // reduce __kmp_user_lock_table.used before freeing the lock,
4095 // so that state of locks is consistent
4096 //
4097 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4098 --__kmp_user_lock_table.used ];
4099
4100 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4101 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4102 //
4103 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4104 // initialized AND it is NOT a critical section (user is not
4105 // responsible for destroying criticals) AND we know source
4106 // location to report.
4107 //
4108 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4109 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4110 ( loc->psource != NULL ) ) {
4111 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
Andrey Churbanove8595de2015-02-20 18:19:41 +00004112 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004113 __kmp_str_loc_free( &str_loc);
4114 }
4115
4116#ifdef KMP_DEBUG
4117 if ( IS_CRITICAL( lck ) ) {
4118 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4119 }
4120 else {
4121 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4122 }
4123#endif // KMP_DEBUG
4124
4125 //
4126 // Cleanup internal lock dynamic resources
4127 // (for drdpa locks particularly).
4128 //
4129 __kmp_destroy_user_lock( lck );
4130 }
4131
4132 //
4133 // Free the lock if block allocation of locks is not used.
4134 //
4135 if ( __kmp_lock_blocks == NULL ) {
4136 __kmp_free( lck );
4137 }
4138 }
4139
4140#undef IS_CRITICAL
4141
4142 //
4143 // delete lock table(s).
4144 //
4145 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4146 __kmp_user_lock_table.table = NULL;
4147 __kmp_user_lock_table.allocated = 0;
4148
4149 while ( table_ptr != NULL ) {
4150 //
4151 // In the first element we saved the pointer to the previous
4152 // (smaller) lock table.
4153 //
4154 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4155 __kmp_free( table_ptr );
4156 table_ptr = next;
4157 }
4158
4159 //
4160 // Free buffers allocated for blocks of locks.
4161 //
4162 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4163 __kmp_lock_blocks = NULL;
4164
4165 while ( block_ptr != NULL ) {
4166 kmp_block_of_locks_t *next = block_ptr->next_block;
4167 __kmp_free( block_ptr->locks );
4168 //
4169 // *block_ptr itself was allocated at the end of the locks vector.
4170 //
4171 block_ptr = next;
4172 }
4173
4174 TCW_4(__kmp_init_user_locks, FALSE);
4175}
4176
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004177#endif // KMP_USE_DYNAMIC_LOCK